Invalid Device Function - Using doubles on Tesla C1060

Hi All,

I’ve been trying to run the following code on a Tesla c1060. I do know that its double precision compatible and on compiling using the following ‘nvcc -arch=sm_13’ file.cu’ I get the following error ‘Invalid Device Function’. What could be the problem? Do I need to add some more options while compiling…It seems to run in the emulation mode

Thanks,

Vandhan

[codebox]

//cudacomplex is basically a struct with 4 doubles

//kernel

global void cholesky(cudacomplex *gpurows)

{

gpurows[0].real+=1;

gpurows[0].row+=1;

gpurows[0].img+=1;

gpurows[0].column+=1;

double b[4]={9.3,3.6,9.3,3.2};

gpurows[1]=b;

gpurows[2]=complexsqrt(gpurows[1]);

}

int main()

{

//row - column, real , imaginary

    cudacomplex *gpurows,*gpurows2;

    size_t sizeGM;

    gpurows2 = new cudacomplex[3];

    double a[4]={1.0,2.0,3.0,4.0};

    gpurows2[0]=a;

    gpurows2[1]=a;

    gpurows2[2]=a;

    gpurows = new cudacomplex[3];

    cudacomplex blah = {1.0,2.0,3.0,4.0};

cudaMallocPitch((void**) &gpurows, &sizeGM, 3*sizeof(cudacomplex),1);

cudaMemcpy(gpurows,gpurows2,3*sizeof(cudacomplex),cudaMemcpy

HostToDevice);

    cholesky<<<1,1>>>(gpurows);

    cudaError_t err = cudaGetLastError();

    fprintf(stderr, "Kernel Error - %s\n",cudaGetErrorString(err));

    fflush(stdout);

cudaMemcpy(gpurows2,gpurows,3*sizeof(cudacomplex),cudaMemcpy

DeviceToHost);

} [/codebox]