Error code -54 ? Error code -54 on clEnqueueNDRangeKernel

Hi,

I’ve been porting some code from CUDA to OpenCL and I get the following error in my code when I try to run it. The kernel builds fine and so does the program. It’s during runtime that it screams. Here is the info I feel would be relevant, but let me know if anything else is needed.

[codebox]size_t localWorkSize = {16, 4, 4};

size_t globalWorkSize = {27, 45 * 38, 1};[/codebox]

[codebox]error |= clSetKernelArg(fdk_kernel, 0, sizeof(cl_mem), (void *) &dev_vol);

error |= clSetKernelArg(fdk_kernel, 1, sizeof(int2), (void *) &kargs->img_dim);

error |= clSetKernelArg(fdk_kernel, 2, sizeof(float2), (void *) &kargs->ic);

error |= clSetKernelArg(fdk_kernel, 3, sizeof(float4), (void *) &kargs->nrm);

error |= clSetKernelArg(fdk_kernel, 4, sizeof(float), &kargs->sad);

error |= clSetKernelArg(fdk_kernel, 5, sizeof(float), &kargs->scale);

error |= clSetKernelArg(fdk_kernel, 6, sizeof(float4), (void *) &kargs->vol_offset);

error |= clSetKernelArg(fdk_kernel, 7, sizeof(int4), (void *) &kargs->vol_dim);

error |= clSetKernelArg(fdk_kernel, 8, sizeof(float4), (void *) &kargs->vol_pix_spacing);

error |= clSetKernelArg(fdk_kernel, 9, sizeof(int), &blocksInY);

error |= clSetKernelArg(fdk_kernel, 10, sizeof(cl_float), &invBlocks_Y);

error |= clSetKernelArg(fdk_kernel, 11, sizeof(cl_mem), (void *) &dev_img);

error |= clSetKernelArg(fdk_kernel, 12, sizeof(cl_mem), (void *) &dev_matrix);[/codebox]

[codebox]__kernel void kernel_fdk(

__global float *dev_vol, 

__global int2 img_dim, 

__global float2 ic, 

__global float4 nrm, 

__global float sad, 

__global float scale, 

__global float4 vol_offset, 

__global int4 vol_dim, 

__global float4 vol_pix_spacing, 

__global uint Blocks_Y, 

__global float invBlocks_Y,

__global float *dev_matrix,

__global float *dev_img

)[/codebox]

The system errors out when trying to run the kernel using the following command:

[codebox]error = clEnqueueNDRangeKernel(command_queue, fdk_kernel, 3, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);[/codebox]

In C, since int2, int4, float2 and float4 don’t exist, I created the following:

[codebox]struct int2 {

int x;

int y;

};

struct int4 {

int x;

int y;

int z;

int w;

};

struct float2 {

float x;

float y;

};

struct float4 {

float x;

float y;

float z;

float w;

};[/codebox]

Error -54 is CL_INVALID_WORK_GROUP_SIZE - values in your “globalWorkSize” array are not divisible with values in your “localWorkSize” array, and that’s it (remember that global work size is really total number of threads along each dimension, and not the size of the “block” of threads along corresponding dimension).