I am getting wrong results when using the OpenCL builtin function rotate() on a 64-bit integer.
The following minimal program reproduces the error:
#include <CL/opencl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <inttypes.h>
const char source[] =
"__kernel void test_rotate(__global ulong *restrict d_count)"
"{"
" const ulong n = 16;"
" d_count[0] = (2004413935125273123UL << n) | (2004413935125273123UL >> (64 - n));"
" d_count[1] = rotate(2004413935125273123UL, n);"
"}"
;
int main()
{
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_mem mem;
cl_kernel kernel;
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
queue = clCreateCommandQueue(context, device, 0, NULL);
const char *sources[1] = {source};
program = clCreateProgramWithSource(context, 1, sources, NULL, NULL);
clBuildProgram(program, 1, &device, NULL, NULL, NULL);
mem = clCreateBuffer(context, CL_MEM_READ_WRITE, 2*sizeof(cl_ulong), NULL, NULL);
kernel = clCreateKernel(program, "test_rotate", NULL);
clSetKernelArg(kernel, 0, sizeof(mem), &mem);
const size_t work_size[1] = {1};
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, work_size, 0, NULL, NULL);
cl_ulong *buf = (cl_ulong *)clEnqueueMapBuffer(queue, mem, true, CL_MAP_READ, 0, 2*sizeof(cl_ulong), 0, NULL, NULL, NULL);
printf("expected: 0x%"PRIX64"\n", buf[0]);
printf("rotate(): 0x%"PRIX64"\n", buf[1]);
clEnqueueUnmapMemObject(queue, mem, buf, 0, NULL, NULL);
return 0;
}
gcc -Wall -O2 -o rotate rotate.c -lOpenCL
The tests were run using a Tesla C2070 on CentOS 6.6 x86_64.
With NVIDIA driver version 346.59 (OpenCL 1.1 CUDA 7.0.35), rotate() returns a wrong result:
expected: 0x1BDAA9FC1A231BD1
rotate(): 0x1BDAC5CD35FDA9FC
With NVIDIA driver version 331.67 (OpenCL 1.1 CUDA 6.0.1), rotate() returns the correct result:
expected: 0x1BDAA9FC1A231BD1
rotate(): 0x1BDAA9FC1A231BD1
I also tested different bit shifts with NVIDIA driver version 346.59.
rotate() returns the wrong result for n = 1…31, while it is correct for n = 32…63.