Hello,
I’m quite new to CUDA, though I have good background in programming.
I’m working on a project where I use matrices of increasingly smaller size. That’s how it looks like:
// host
float *u[MAX_GRID_LEVELS];
float *v[MAX_GRID_LEVELS];
// device
float *uCUDA[MAX_GRID_LEVELS];
float *vCUDA[MAX_GRID_LEVELS];
int nx[MAX_GRID_LEVELS];
int ny[MAX_GRID_LEVELS];
int levels = 0;
// ... initialization stuff ...
nx[levels] = 65;
nx[levels] = 65;
// allocate memory on CPU for each grid
// and initialize to 0
while(( nx[levels] > 2 ) && (ny[levels] > 2) && (levels < MAX_GRID_LEVELS))
{
size_t sizeCPU = nx[levels] * ny[levels] * sizeof(float);
u[levels] = new float[ sizeCPU ];
v[levels] = new float[ sizeCPU ];
memset( u[levels], 0, sizeCPU );
memset( v[levels], 0, sizeCPU );
nx[levels+1] = (nx[levels] + 1)/2;
ny[levels+1] = (ny[levels] + 1)/2;
levels++;
}
// Allocate memory on GPU for each grid
// and initialize grids with 0.
for(int k = 0; k <= levels; k++) {
size_t sizeGPU = nx[k] * ny[k] * sizeof(float);
cudasafe( cudaMalloc( (void**) &uCUDA[k], sizeGPU ), "cudaMallocU" );
cudasafe( cudaMalloc( (void**) &vCUDA[k], sizeGPU ), "cudaMallocV" );
cudasafe( cudaMemcpy( uCUDA[k], u[k], sizeGPU, cudaMemcpyHostToDevice ), "cudaMemcpyU" );
cudasafe( cudaMemcpy( vCUDA[k], v[k], sizeGPU, cudaMemcpyHostToDevice ), "cudaMemcpyV" );
cudasafe( cudaMemset( *uCUDA, 0, sizeGPU ), "cudaMemsetU" );
cudasafe( cudaMemset( *vCUDA, 0, sizeGPU ), "cudaMemsetV" );
}
cudasafe is a small routine used to catch and print any cudaError generated
void cudasafe( cudaError_t error, char *message)
Well, the cudaMalloc doesn’t report any error, while I get a CUDA error 11 (cudaErrorInvalidValue) on the first cudaMemcpy.
I can’t figure out why. is the way I allocate memory for each grid correct? could that be the cause of the error?
Please, any help would be appreciated