Hello,
I tried to run a kernel in a loop. After 2042 iterations the exit code of clEnqueueNDRange returns -6 (cl_out_of_host_memory).
The same operation with CUDA gives no problems.
[codebox]
for(int i=0; i <100000 ; i++) {
my_kernel = clCreateKernel(my_program, "kernelLoad", &ci_kernel);
if (ci_error != CL_SUCCESS) {fprintf(logout,"Error number %i\n", ci_kernel);}
//Set kernek param
clSetKernelArg(my_kernel, 0, sizeof(cl_mem), (void *)&hDeviceMemA);
clSetKernelArg(my_kernel, 1, sizeof(cl_mem), (void *)&hDeviceMemB);
clSetKernelArg(my_kernel, 2, sizeof(cl_mem), (void *)&hDeviceMemC);
clSetKernelArg(my_kernel, 3, sizeof(cl_mem), (void *)&hDeviceMemK);
//RUN KERNEL & GET OUTPUT
ci_error = clEnqueueNDRangeKernel(my_command_queue, my_kernel,1, 0 ,globalsize, localsize, 0, 0, &device_execution);
fprintf(logout,"Kernel status exit %i loop %i \n", ci_error, i);
if(ci_error == -6) exit(0);
// copy results from device back to host
ci_error =
clEnqueueReadBuffer(my_command_queue, hDeviceMemA, CL_TRUE, 0,length * sizeof(cl_float),pA, 0, 0, 0);
if (ci_error != CL_SUCCESS) {fprintf(logout,"Error number %i\n", ci_error);}
fprintf(logout,"\n");
fprintf(logout,"KERNEL LOAD exec in Time %.3f ms\n",utils_execution_time (device_execution));
}[/codebox]
Note I tried this on GTX275 and Tesla C1060