I have the following code:
__global __ void zeros(int * in, int * out, int width, int height, int channels) {
int ii;
int xIndex = threadIdx.x + blockIdx.x*blockDim.x;
int yIndex = threadIdx.y + blockIdx.y*blockDim.y;
int index = channels*(xIndex + yIndex*width);
if (xIndex < width && yIndex < height) {
for (ii = 0; ii < channels; ii++)
out[index+ii] = 0;
}
}
set the block and grid dim
dim3 CUDA_BlockDimensions;
dim3 CUDA_GridDimensions;
CUDA_BlockDimensions.x = 16;
CUDA_BlockDimensions.y = 16;
CUDA_GridDimensions.x = 32;
CUDA_GridDimensions.y = 32;
And launch it using:
cudaGetLastError();
zeros<<<CUDA_BlockDimensions, CUDA_GridDimensions>>>(CUDA_input1, CUDA_output1, input2, input3, input4);
CUDALink_Err = cudaGetLastError();
if( CUDA_Err != cudaSuccess)
{
funStruct->Message("launch");
printf("BlockDimensions x=%d, y=%d, z=%d\n", CUDA_BlockDimensions.x, CUDA_BlockDimensions.y, CUDA_BlockDimensions.z);
printf("GridDimensions x=%d, y=%d, z=%d\n", CUDA_GridDimensions.x, CUDA_GridDimensions.y, CUDA_GridDimensions.z);
printf(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
printf("ERRROR on line %d in %s:%s\n", __LINE__, __FILE__, __func__);
printf("CUDA Error %s\n", cudaGetErrorString(CUDA_Err));
printf("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n");
return dllErr;
}
Here is the result from the printf:
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
BlockDimensions x=16, y=16, z=1
GridDimensions x=32, y=32, z=1
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
ERRROR on line 136 in CUDALink_CUDAZero
CUDA Error invalid configuration argument
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
the block and grid dimensions seem reasonable to me.
thanks