I must say sorry for my poor english first.
I wrote a test application to test the single thread CUDA performance. And compared with CPU. The result is as follows. The result show the GPU is very slow when it is compared with CPU and they all use single thread. Is the result correct?
CPU: Pentium D 2.8x2 (only use one thread)
GPU: GF9600GT
All codes compiled by VS2005.
-
int i, j, k = 0; for(j = 0; j < 1000000;) { k += j++; k += j++; k += j++; k += j++; k += j++; k += j++; k += j++; k += j++; k += j++; k += j++; }
CPU(Debug): 0ms, CPU(Release): 0ms, GPU: 25.8ms
-
int i, j, k = 0; for(j = 0; j < 10000000;) { k += j++; k += j++; k += j++; k += j++; k += j++; k += j++; k += j++; k += j++; k += j++; k += j++; }
CPU(Debug): 31ms, CPU(Release): 0ms, GPU:256ms
-
int i, j, k = 0; for(j = 1; j < 1000000;) { k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; }
CPU(Debug): 0ms, CPU(Release): 0ms, GPU:4.8ms
-
int i, j, k = 0; for(j = 1; j < 10000000;) { k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; }
CPU(Debug): 62ms, CPU(Release):31ms GPU:45.6ms
-
int i, j, k = 0; for(j = 1; j < 100000000;) { k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; k *= j++; }
CPU(Debug): 545ms, CPU(Release):374ms GPU:451.7ms
-
int i, j, k = 0; for(j = 1; j < 10000000; j++) { k = j; k >>= 1; k >>= 2; k >>= 3; k >>= 4; k >>= 5; k >>= 6; k >>= 7; k >>= 8; }
CPU(Debug):124ms, CPU(Release):0ms GPU:503ms