OpenCL CL_INVALID_COMMAND_QUEUE issue

Would be appreciate if somebody can help. I have a small kernel that always finishes with CL_INVALID_COMMAND_QUEUE error. I’ve tried on different hardware gtx 765m or gtx 980, the result is the same.
Any ideas? Here is the code host + kernel:

//get all platforms (drivers)
std::vectorcl::Platform all_platforms;
cl::Platform::get(&all_platforms);
if(all_platforms.size()==0){
std::cout<<" No platforms found. Check OpenCL installation!\n";
exit(1);
}
cl::Platform default_platform=all_platforms[1];
std::cout << “Using platform: “<<default_platform.getInfo<CL_PLATFORM_NAME>()<<” \n”;

//get default device of the default platform
std::vectorcl:evice all_devices;
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
if(all_devices.size()==0){
std::cout<<" No devices found. Check OpenCL installation!\n";
exit(1);
}
cl:evice default_device=all_devices[0];
std::cout<< “Using device: “<<default_device.getInfo<CL_DEVICE_NAME>()<<” \n”;

cl::Context context({default_device});

cl::Program::Sources sources;

std::string kernel_code=
“__kernel void test(__global float* A,__global float* R) {”
“int i = get_global_id(0);”
“if(i>=1075021) return;”
“if(i<60000) {”
“R[i]=0;”
“return;”
“};”
“float vm=0.f;”
“for(int j=i-60000;j<=i;++j)”
“vm+=A[j];”
“R[i]=vm;”
“};”;

sources.push_back({kernel_code.c_str(),kernel_code .length()});

cl::Program program(context,sources);
if(program.build({default_device})!=CL_SUCCESS){
std::cout<<" Error building: “<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(defa ult_device)<<”\n";
exit(1);
}

size_t n=1075021;
// create buffers on the device
cl::Buffer buffer_A(context,CL_MEM_READ_WRITE,sizeof(float)*n );
cl::Buffer buffer_R(context,CL_MEM_READ_WRITE,sizeof(float)*n );

float *A = new float[n];
float *R = new float[n];

srand (time(NULL));

for(size_t i=0;i<n;++i)
A[i]=rand()%10;

cl::CommandQueue queue(context,default_device);

cl_int ret;
ret=queue.enqueueWriteBuffer(buffer_A,CL_TRUE,0,si zeof(float)*n,A);
ret=queue.finish();

cl::Kernel kernel_test=cl::Kernel(program,“test”);
kernel_test.setArg(0,buffer_A);
kernel_test.setArg(1,buffer_R);

size_t max_work_size=1024;
size_t num_work_groups = (n-1) / max_work_size + 1;
size_t global_size_padded = num_work_groups * max_work_size;

queue.enqueueNDRangeKernel(kernel_test,cl::NullRan ge,cl::NDRange(global_size_padded),cl::NDRange(max _work_size));
ret=queue.finish();

ret=queue.enqueueReadBuffer(buffer_R,CL_TRUE,0,siz eof(float)*n,R);

I can’t compile your code, it is incomplete and has various typographical errors. You’ll note that cl::Device is garbled wherever you have it. It may help if you post code using the code markers which are available in the toolbar above your edit window.

Anyway my guess is simply that you are running into a WDDM TDR timeout on windows. You might want to google that and make an adjustment.