CL_OUT_OF_RESOURCES on clFinish() <br />

I’m getting a CL_OUT_OF_RESOURCES error trying to run this code:

cl_mem ocl_rays	= clCreateBuffer  (ocl_context, CL_MEM_READ_ONLY , sizeof(ray_t)*ray_count, NULL, &err);

err = clEnqueueWriteBuffer(ocl_cmd_queue, ocl_rays, CL_TRUE, 0, sizeof(ray_t)*ray_count, local_rays, 0, NULL, NULL);

err=clFinish(ocl_cmd_queue);

cl_mem ocl_return	= clCreateBuffer  (ocl_context, CL_MEM_WRITE_ONLY, ray_count*sizeof(ocl_isect), NULL, &err);

err |= clSetKernelArg(ocl_kernel, 0, sizeof(cl_mem), (void *)&triangles_gpu_mem);

err |= clSetKernelArg(ocl_kernel, 1, sizeof(cl_mem), (void *)&ocl_rays);

err |= clSetKernelArg(ocl_kernel, 2, sizeof(cl_mem), (void *)&ocl_return);

err |= clSetKernelArg(ocl_kernel, 3, sizeof(unsigned int), (void *)&triangles_count);

err |= clSetKernelArg(ocl_kernel, 4, sizeof(unsigned int), (void *)&original_count);

err=clFinish(ocl_cmd_queue);

//err = clEnqueueNDRangeKernel(ocl_cmd_queue, ocl_kernel, dim, NULL, dims,local_dims, 0, NULL, NULL);

//err = clEnqueueNDRangeKernel(ocl_cmd_queue, ocl_kernel, dim, NULL, dims,NULL, 0, NULL, NULL);

printf("%d\n",err);

err = clFinish(ocl_cmd_queue);

err=clEnqueueReadBuffer(ocl_cmd_queue, ocl_return, CL_TRUE, 0,sizeof(ocl_isect)*ray_count,ris, 0, 0, 0);

err=clFinish(ocl_cmd_queue);

err=clReleaseMemObject(ocl_return);

err=clReleaseMemObject(ocl_rays);

on the clFinish() call just after the clEnqueueNDRangeKernel…

I call this code with 100000 work items, in a [100,100,10] global dimension matrix and i’ve tried to set the group dimensions to a NULL pointer (which should autodetect the sizes) and then with dimensions fixed at [2,5,1].

This function:

err= clGetKernelWorkGroupInfo(ocl_kernel,ocl_device,CL_KERNEL_WORK_GROUP_SIZE ,sizeof(size_t),(void *)&local_work_size,NULL);

return a local_work_size of 192, so I think i’m well within the hardware/implementation limits…

I suppose the opencl code is ok, as i’ve tried to run it with the AMD opencl CPU implementation and it works just fine…

Everything I found about the CL_OUT_OF_RESOURCES error points to

  1. sampler problems (I don’t use any(or am I missing something?))

  2. work/invalid/??? work group dimensions

Any idea???

PS:

the hardware is a Quadro FX 570M;

the kernel code is:

void isect_ray_triangle(unsigned int first_idx, ocl_isect *ris, ray_t ray, triangle_t triangle) {

	float4 tvec = ray.o - triangle.v0;

	float4 pvec = cross(ray.d, triangle.e2);

	float  det  = dot(triangle.e1, pvec);

	if(det>-EPSILON && det<EPSILON)

		return;

	det=1.0f/det;

	float u = dot(tvec, pvec) * det;

	if (u < 0.0f || u > 1.0f)

		return;

	float4 qvec = cross(tvec, triangle.e1);

	float v = dot(ray.d, qvec) * det;

	if (v < 0.0f || (u + v) > 1.0f)

		return;

	float t=dot(triangle.e2, qvec) * det;

	if(t<0 || t>ris->distance)

		return;

	ris->distance=t;

	ris->idx=first_idx;

	ris->u=u;

	ris->v=v;

}

__kernel void intersect (__global triangle_t *triangles,

						 __global ray_t *rays,

						 __global ocl_isect *results,

						 __const unsigned int tcount,

						 __const unsigned int rays_count

						 ) {

  const unsigned int idx=get_global_id(2)*256*256+get_global_id(1)*256+get_global_id(0);

  if(idx<=rays_count){

		ocl_isect r;

	r.distance=999999.0f;

	r.idx=4294967295;

	ray_t ray=rays[idx];

	for(unsigned int i=0;i<tcount;i++) {

		triangle_t t=triangles[i];

		isect_ray_triangle(i,&r, ray, t);

		}

	results[idx]=r;

  }

}

With the new driver (195.17) and the SDK 3 beta everything works :)
I was still using the old 190.29 driver with the 2.3 sdk… :|

Is it possible to download it without having to be registered as a nVidia developer? I’m in a similar situation.

I found it here:

http://forums.nvidia.com/index.php?showtopic=149959 :)

Much better now, thanks a lot!