Hi all,

I have some trouble getting some kernel to work. I keep getting a invalid argument error and have no clue what the reason could be. Below is the code

```
uint num_total = 1033;
uint4 *temp_u4, *g_total;
temp_u4 = (uint4 *) mxMalloc(num_total*sizeof(uint4));
convert_matlab2uint4(mxGetPr(m_total), temp_u4, num_total, num_total);
CUDA_SAFE_CALL( cudaMalloc( (void **) &g_total, num_total*sizeof(uint4)));
CUDA_SAFE_CALL( cudaMemcpy( g_total, temp_u4, num_total*sizeof(uint4), cudaMemcpyHostToDevice));
mxFree(temp_u4);
float4 *g_X_arr, *g_Y_arr, *g_Z_arr;
CUDA_SAFE_CALL( cudaMalloc( (void **) &g_X_arr, num_total*sizeof(float4)));
CUDA_SAFE_CALL( cudaMalloc( (void **) &g_Y_arr, num_total*sizeof(float4)));
CUDA_SAFE_CALL( cudaMalloc( (void **) &g_Z_arr, num_total*sizeof(float4)));
generate_XYZ<<<((int) ceil(((double) num_total)/32.0)),32>>>(g_total, g_X_arr, g_Y_arr, g_Z_arr);
CUT_CHECK_ERROR("An error occured"); // This kernel happily runs
uint *special, *g_special;
special = (uint *) mxMalloc(num_total*sizeof(uint));
CUDA_SAFE_CALL( cudaMalloc( (void **) &g_special, num_total*sizeof(uint)));
uint total_dim = (uint) ceil(((float) num_total)/32.0);
uint *g_num_special;
CUDA_SAFE_CALL( cudaMalloc( (void **) &g_num_special, 1*sizeof(uint)));
cudaMemset(g_num_special, 0, 1*sizeof(uint));
fprintf(stderr, "<<<%d, %d>>>(%d, %d, %d, %d, %d, %d)\n", total_dim, 32, g_total, g_num_special, g_special, g_X_arr, g_Y_arr, g_Z_arr);
check_for_special<<<total_dim, 32>>>(g_total, g_num_special, g_special, g_X_arr, g_Y_arr, g_Z_arr);
CUT_CHECK_ERROR("An error occured"); // This is line 113
```

The printf generates : <<<33, 32>>>(61734912, 16805376, 61997056, 61800448, 61865984, 61931520)

And the Error check after gives me:

Cuda error: An error occured in file ‘total_kernel.cu’ in line 113 : invalid argument.

So the kernel is not even starting. All the previous posts on this subject are about memcpy3D and the FAQ only tells that the size of the arguments might be the trouble, but as far as I can see I have only 6 pointers as in/output…

If anybody can shed a light, I would be very happy!