I am having a strange problem with CUDA. Anytime I use a double type in my kernel function(s) things go very very wrong…
If I change ‘VARTYPE’ to float everything works.
I am using a 8600GT card which only has compute compatibility of 1.1. Both in this http://forums.nvidia.com/index.php?showtopic=33079 thread and the CUDA programming guide…
there is talk about a conversion for my device. I was under the impression that this was done automatically and did not have to worry about this. Now it seems I do. But no matter what kind of converts I try to do for the sample code below I do not get a sensible answer? What can I do except for converting everything in my application to floats?
See the sample code below:
#include <stdio.h>
#include <cutil.h>
#define VARTYPE double
//#define VARTYPE float
__global__ void test(VARTYPE *data) {
int id = threadIdx.x + threadIdx.y * gridDim.x;
data[id] = id;
}
#define NB 512
int main() {
dim3 blockSize(NB, 1);
VARTYPE *dbg;
VARTYPE res[NB];
CUDA_SAFE_CALL(cudaMalloc((void**)&dbg, sizeof(*dbg) * NB));
test<<<1, blockSize>>>(dbg);
CUDA_SAFE_CALL(cudaMemcpy(res, dbg, sizeof(*res) * NB, cudaMemcpyDeviceToHost));
CUT_CHECK_ERROR("1. kernel");
CUDA_SAFE_CALL(cudaThreadSynchronize());
for (int i = 0; i != NB; i++) printf("%3d %f\n", i + 1, res[i]);
CUDA_SAFE_CALL(cudaFree(dbg));
}