Very small work group size

I recently got NVidia GTX 970 card and is testing some OpenCL codes from my work. However, I notice that I could only get a maximum work size of 13. (CL_DEVICE_MAX_WORK_GROUP_SIZE=13). I have the latest CUDA 7.0. Both the stable 346.59 driver and the beta 349.12 driver shows the same results under OpenCL. I’m on Debian Linux. Could somebody help ?

CL_DEVICE_NAME: GeForce GTX 970
CL_DEVICE_VENDOR: NVIDIA Corporation
CL_DRIVER_VERSION: 349.12
CL_DEVICE_VERSION: OpenCL 1.2 CUDA
CL_DEVICE_MAX_COMPUTE_UNITS: 13
CL_DEVICE_MAX_WORK_GROUP_SIZE: 13
CL_DEVICE_MAX_WORK_ITEM_SIZES: (0 0 0)

Here is the clGetDeviceInfo code:

int queryInt;
int queryDim[3] = {0,0,0};
cl_int clError;
clError = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(queryBuffer), &queryBuffer, NULL);
printf("CL_DEVICE_NAME: %s\n", queryBuffer);
queryBuffer[0] = '

int queryInt;
int queryDim[3] = {0,0,0};
cl_int clError;
clError = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_NAME: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_VENDOR: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DRIVER_VERSION: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_VERSION: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &queryInt, NULL);
printf(“CL_DEVICE_MAX_COMPUTE_UNITS: %d\n”, queryInt);
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(int), &queryInt, NULL);
printf(“CL_DEVICE_MAX_WORK_GROUP_SIZE: %d\n”, queryInt);
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(int), queryDim, NULL);
printf(“CL_DEVICE_MAX_WORK_ITEM_SIZES: (%d %d %d)\n”, queryDim[0], queryDim[1], queryDim[2]);

';
clError = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(queryBuffer), &queryBuffer, NULL);
printf("CL_DEVICE_VENDOR: %s\n", queryBuffer);
queryBuffer[0] = '

int queryInt;
int queryDim[3] = {0,0,0};
cl_int clError;
clError = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_NAME: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_VENDOR: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DRIVER_VERSION: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_VERSION: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &queryInt, NULL);
printf(“CL_DEVICE_MAX_COMPUTE_UNITS: %d\n”, queryInt);
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(int), &queryInt, NULL);
printf(“CL_DEVICE_MAX_WORK_GROUP_SIZE: %d\n”, queryInt);
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(int), queryDim, NULL);
printf(“CL_DEVICE_MAX_WORK_ITEM_SIZES: (%d %d %d)\n”, queryDim[0], queryDim[1], queryDim[2]);

';
clError = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
printf("CL_DRIVER_VERSION: %s\n", queryBuffer);
queryBuffer[0] = '

int queryInt;
int queryDim[3] = {0,0,0};
cl_int clError;
clError = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_NAME: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_VENDOR: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DRIVER_VERSION: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_VERSION: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &queryInt, NULL);
printf(“CL_DEVICE_MAX_COMPUTE_UNITS: %d\n”, queryInt);
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(int), &queryInt, NULL);
printf(“CL_DEVICE_MAX_WORK_GROUP_SIZE: %d\n”, queryInt);
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(int), queryDim, NULL);
printf(“CL_DEVICE_MAX_WORK_ITEM_SIZES: (%d %d %d)\n”, queryDim[0], queryDim[1], queryDim[2]);

';
clError = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
printf("CL_DEVICE_VERSION: %s\n", queryBuffer);
queryBuffer[0] = '

int queryInt;
int queryDim[3] = {0,0,0};
cl_int clError;
clError = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_NAME: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_VENDOR: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DRIVER_VERSION: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(queryBuffer), &queryBuffer, NULL);
printf(“CL_DEVICE_VERSION: %s\n”, queryBuffer);
queryBuffer[0] = ‘\0’;
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &queryInt, NULL);
printf(“CL_DEVICE_MAX_COMPUTE_UNITS: %d\n”, queryInt);
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(int), &queryInt, NULL);
printf(“CL_DEVICE_MAX_WORK_GROUP_SIZE: %d\n”, queryInt);
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(int), queryDim, NULL);
printf(“CL_DEVICE_MAX_WORK_ITEM_SIZES: (%d %d %d)\n”, queryDim[0], queryDim[1], queryDim[2]);

';
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(int), &queryInt, NULL);
printf("CL_DEVICE_MAX_COMPUTE_UNITS: %d\n", queryInt);
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(int), &queryInt, NULL);
printf("CL_DEVICE_MAX_WORK_GROUP_SIZE: %d\n", queryInt);
clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(int), queryDim, NULL);
printf("CL_DEVICE_MAX_WORK_ITEM_SIZES: (%d %d %d)\n", queryDim[0], queryDim[1], queryDim[2]);

my guess is that the 13 is left over from the previous call/setting of queryInt

you don’t appear to be doing proper error checking. Do you get any kind of error reported in clError from this line:

clError = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(int), &queryInt, NULL);

Thank you. I indeed got an error querying CL_DEVICE_MAX_WORK_GROUP_SIZE

clGetDeviceInfo -30(CL_INVALID_VALUE)

I wonder why I got this error. Everything before that was ok. My code is like this

clErr = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
device_list = (cl_device_id *)malloc(sizeof(cl_device_id)*num_devices);
clErr = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL);
if (clErr ) {
        printf("ERROR: clGetDeviceIDs %d(%s)\n", clErr , get_CL_error_string(clErr ));
}
PrintDeviceInfo(device_list[0]);

and the PrintDeviceInfo is listed in my original post

I googled more about the error. Turns our querying CL_DEVICE_MAX_WORK_GROUP_SIZE returns size_t[3]. Pretty odd.

Since the work group can be 3 dimensional, I think it makes sense that the returned maximum size can be 3 dimensional. each of the 3 dimensions has a maximum.