Multi-file cuda programs

Hi,

I am having some problem with cuda for C. I am trying to write a program which is split over different files. I have a file with my main program in, test.c, a file with the cuda commands in and header, testcudainv.cu and testcudainv.cuh and a file for the kernel, testcudakernel.cu and testcudakernel.cuh.

I am having problems copying data from and to the device. It compiles perfectly well but I get the following error at runtime:

testcudainv.cu(23) : cudaSafeCall() Runtime API error : invalid argument.

The line it refers to is the first line in the following subroutine.

void cudacopyhosttodev(float *dar_x, float *dar_y, float *dar_z, float *ar_x, float *ar_y, float *ar_z, int nsize)
{

    cutilSafeCall(cudaMemcpy(dar_x, ar_x, nsize*sizeof(float), cudaMemcpyHostToDevice));
    cutilSafeCall(cudaMemcpy(dar_y, ar_y, nsize*sizeof(float), cudaMemcpyHostToDevice));
    cutilSafeCall(cudaMemcpy(dar_z, ar_z, nsize*sizeof(float), cudaMemcpyHostToDevice));

}

dar_x, dar_y, dar_z are the device arrays allocated in a subroutine and the rest are cpu variables declared as pointers and allocated with malloc.

void cudaallocate( void **dar_x, void **dar_y, void **dar_z, int nsize)
{

   cutilSafeCall(cudaMalloc(dar_x, nsize*sizeof(float)));
    cutilSafeCall(cudaMalloc(dar_y, nsize*sizeof(float)));
    cutilSafeCall(cudaMalloc(dar_z, nsize*sizeof(float)));

}

any help with the runtime error (testcudainv.cu(23) : cudaSafeCall() Runtime API error : invalid argument) would be greatly appreciated. This works if all the code is in one file so I know the basic code is right but think I may be messing up the declarations in some way.

I have attached the files but I needed to change the names so it would let me. These are just a test case which simply fills an array, copies it across, simple write on the gpu and then write back to the host.

Thanks

Dean

P.S couldn’t get the C file to upload so have just cut it below. Haven’t bothered including the header files as they do nothing but list the sub routines/functions.

#include “header.h”
#include “testcudainv.cuh”

int main(int argc, char **argv)
{
float *ar_x, *ar_y, *ar_z;
float *dar_x, *dar_y, *dar_z;
int i, A, typea, nblocks, nthreads, nsize;

A=100;
typea=60;

cudadevinit(argc, argv);

nblocks=2;
nthreads = 3000;
nsize = nblocks*nthreads;

    ar_x = (float *)malloc(A*typea*sizeof(float));
    if(ar_x == NULL){
            printf("%s\n","freaked up, malloc failed");
            exit(0);
    }
    ar_y = (float *)malloc(A*typea*sizeof(float));
    if(ar_y == NULL){
            printf("%s\n","freaked up, malloc failed");
            exit(0);
    }
    ar_z = (float *)malloc(A*typea*sizeof(float));
    if(ar_z == NULL){
            printf("%s\n","freaked up, malloc failed");
            exit(0);
    }

cudaallocate((void**)&dar_x, (void**)&dar_y, (void**)&dar_z, nsize);


for(i=0;i<nsize;i++){
	ar_x[i]=i;
	ar_y[i]=i;
	ar_z[i]=i;
}

cudacopyhosttodev(dar_x, dar_y, dar_z, ar_x, ar_y, ar_z, nsize);

for(i=0;i<nsize;i++){
	ar_x[i]=0;
	ar_y[i]=0;
	ar_z[i]=0;
}

// forceinvoke(dar_x, dar_y, dar_z, nblocks, nthreads);

cudacopydevtohost(dar_x, dar_y, dar_z, ar_x, ar_y, ar_z, nsize);

/*
for(i=0;i<nsize;i++){
printf("%s %d %s %f \n",“ar_x [”, i,"] = “, ar_x[i]);
printf(”%s %d %s %f \n",“ar_y [”, i,"] = “, ar_y[i]);
printf(”%s %d %s %f \n",“ar_z [”, i,"] = ", ar_z[i]);
}
*/
cudadeallocate(dar_x, dar_y, dar_z);

free(ar_x);
free(ar_y);
free(ar_z);

return(0);

}
testcudainv.cu (1.48 KB)
test.cu (2.37 KB)