Limit clEnqueueNDRange clEnqueueNDRange, error -6, loop


I tried to run a kernel in a loop. After 2042 iterations the exit code of clEnqueueNDRange returns -6 (cl_out_of_host_memory).

The same operation with CUDA gives no problems.


             for(int i=0; i <100000 ; i++) {

            my_kernel = clCreateKernel(my_program, "kernelLoad", &ci_kernel);

            if (ci_error != CL_SUCCESS) {fprintf(logout,"Error number %i\n", ci_kernel);}

//Set kernek param

            clSetKernelArg(my_kernel, 0, sizeof(cl_mem), (void *)&hDeviceMemA);

            clSetKernelArg(my_kernel, 1, sizeof(cl_mem), (void *)&hDeviceMemB);

            clSetKernelArg(my_kernel, 2, sizeof(cl_mem), (void *)&hDeviceMemC);

            clSetKernelArg(my_kernel, 3, sizeof(cl_mem), (void *)&hDeviceMemK);


            ci_error = clEnqueueNDRangeKernel(my_command_queue, my_kernel,1, 0 ,globalsize, localsize, 0, 0, &device_execution);

            fprintf(logout,"Kernel status exit  %i loop %i \n", ci_error, i);

            if(ci_error == -6) exit(0);

// copy results from device back to host

            ci_error =

                    clEnqueueReadBuffer(my_command_queue, hDeviceMemA, CL_TRUE, 0,length * sizeof(cl_float),pA, 0, 0, 0);

            if (ci_error != CL_SUCCESS) {fprintf(logout,"Error number %i\n", ci_error);}


            fprintf(logout,"KERNEL LOAD exec in Time %.3f ms\n",utils_execution_time (device_execution));


Note I tried this on GTX275 and Tesla C1060

I had the same problem. This error occurs because of the cl_event (device_execution) parameter in clEnqueueNDRangeKernel function.
Simply replace &device_execution by NULL and it should work.

Edit** withdrawn


if you release cl_event obj all go well.