I got this problem only if I call clEnqueueWriteBuffer after running the first kernel.
The following code work fine on ATI SDK with ATI card.
I checked the OpenCL specification and I couldn’t find any information about the
CL_OUT_OF_RESOURCES error code for clEnqueueWriteBuffer.
Please help. Thanks.
Here is my first kernel
float4 mulMat4Vec4(const float16 mat, const float4 vec)
{
float4 ret;
//float4 c1 = mat.s0123 * vec.s0;
//float4 c2 = mat.s4567 * vec.s1;
//float4 c3 = mat.s89ab * vec.s2;
//float4 c4 = mat.scdef * vec.s3;
//ret.s0 = c1.s0 + c2.s0 + c3.s0 + c4.s0;
//ret.s1 = c1.s1 + c2.s1 + c3.s1 + c4.s1;
//ret.s2 = c1.s2 + c2.s2 + c3.s2 + c4.s2;
//ret.s3 = c1.s3 + c2.s3 + c3.s3 + c4.s3;
ret.s0 = mat.s0 * vec.s0 + mat.s4 * vec.s1 + mat.s8 * vec.s2 + mat.sc * vec.s3;
ret.s1 = mat.s1 * vec.s0 + mat.s5 * vec.s1 + mat.s9 * vec.s2 + mat.sd * vec.s3;
ret.s2 = mat.s2 * vec.s0 + mat.s6 * vec.s1 + mat.sa * vec.s2 + mat.se * vec.s3;
ret.s3 = mat.s3 * vec.s0 + mat.s7 * vec.s1 + mat.sb * vec.s2 + mat.sf * vec.s3;
return ret;
}
__kernel void transformKernel(__global RTObject* objects,
const uint numPrimitives,
__global RTPrimitive* primitives,
__global RTPrimitive* outPrimitives)
{
size_t i = get_global_id(0);
if(i >= numPrimitives)
{
return;
}
//Transform vertices
uint objectID = primitives[i].objectIndex;
RTVertex vertexA = primitives[i].vertexA;
RTVertex vertexB = primitives[i].vertexB;
RTVertex vertexC = primitives[i].vertexC;
float16 matrix = objects[objectID].transformation;
//Positions
outPrimitives[i].vertexA.position = mulMat4Vec4(matrix, vertexA.position);
outPrimitives[i].vertexB.position = mulMat4Vec4(matrix, vertexB.position);
outPrimitives[i].vertexC.position = mulMat4Vec4(matrix, vertexC.position);
//Normals
outPrimitives[i].vertexA.normal = mulMat4Vec3(matrix, vertexA.normal);
outPrimitives[i].vertexB.normal = mulMat4Vec3(matrix, vertexB.normal);
outPrimitives[i].vertexC.normal = mulMat4Vec3(matrix, vertexC.normal);
//object index
outPrimitives[i].objectIndex = objectID;
outPrimitives[i].primitiveType = primitives[i].primitiveType;
}
and here is the code that has the problem
//enqueue the first kernel
cl_uint iParam = 0;
cl_uint err = clSetKernelArg(m_knlTransform, iParam++, sizeof(cl_mem), &m_memObjects);
err = clSetKernelArg(m_knlTransform, iParam++, sizeof(cl_uint), &m_iNumPrimitives);
err = clSetKernelArg(m_knlTransform, iParam++, sizeof(cl_mem), &m_primitiveBuffer);
err = clSetKernelArg(m_knlTransform, iParam++, sizeof(cl_mem), &m_memPrimitives);
workDim = 1;
globalSize[0] = (m_iNumPrimitives/128+1)*128;
workSize[0] = 128;
workDone = m_pDevice->EnqueueNDRangeKernel(m_knlTransform, workDim, globalSize, workSize);
clWaitForEvents(1, &workDone);
clReleaseEvent(workDone);
err = clEnqueueWriteBuffer(m_pDevice->GetCommandQueue(), m_memCamera, CL_TRUE, 0, camSize, &m_rtCam, 0, NULL, NULL);
this is the code that create buffer
m_primitiveBuffer = clCreateBuffer(m_pDevice->GetContext(), CL_MEM_READ_WRITE, 1024*sizeof(RTPrimitive), NULL, &err);
m_memPrimitives = clCreateBuffer(m_pDevice->GetContext(), CL_MEM_READ_WRITE, 1024*sizeof(RTPrimitive), NULL, &err);