Hi,
I’m having the same performance issue with my two programs. One of the programs is a bit more complicated with multiple kernels running in a loop, while the other one is just some writes, a kernel execution followed by some reads.
The problem is that performance go down to as much as 10 times the normal performance when nothing else is using the GPU. It sounds weird, but I need to play a movie or browse a site with a flash ad or something like that for best performance :)
OS: WinXP
Graphics Card: GeForce 8800 GTS
Driver: 195.39
On Windows 7 the performance is stable and good, I don’t know if that’s just because screen rendering is hardware accelerated there or if it’s something else.
Using the OpenCL profiler doesn’t give much, GPU times are the same while the CPU times vary a lot and are much higher.
Below is the part of code executing the simpler kernel.
ciErr1 = clEnqueueWriteBuffer(cqCommandQueue, cmDevVertices0, CL_FALSE, 0, sizeof(cl_float4) * szGlobalWorkSize, vertices0, 0, NULL, NULL);
ciErr1 |= clEnqueueWriteBuffer(cqCommandQueue, cmDevVertices1, CL_FALSE, 0, sizeof(cl_float4) * szGlobalWorkSize, vertices1, 0, NULL, NULL);
ciErr1 |= clEnqueueWriteBuffer(cqCommandQueue, cmDevVertices2, CL_FALSE, 0, sizeof(cl_float4) * szGlobalWorkSize, vertices2, 0, NULL, NULL);
ciErr1 |= clEnqueueWriteBuffer(cqCommandQueue, cmDevTransform0, CL_FALSE, 0, sizeof(cl_float16), &clTransform1, 0, NULL, NULL);
ciErr1 |= clEnqueueWriteBuffer(cqCommandQueue, cmDevTransform1, CL_FALSE, 0, sizeof(cl_float16), &clTransform2, 0, NULL, NULL);
if (ciErr1 != CL_SUCCESS)
{
printf("Error in clEnqueueWriteBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
//Cleanup(EXIT_FAILURE);
exit(1);
}
const size_t localWorkSize = LOCAL_WORK_SIZE;
ciErr1 = clEnqueueNDRangeKernel(cqCommandQueue, ckKernel, 1, NULL, &szGlobalWorkSize, &localWorkSize, 0, NULL, NULL);
clFinish(cqCommandQueue);
if (ciErr1 != CL_SUCCESS)
{
printf("Error in clEnqueueNDRangeKernel, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
//Cleanup(EXIT_FAILURE);
exit(1);
}
ciErr1 = clEnqueueReadBuffer(cqCommandQueue, cmDevResultPoints, CL_FALSE, 0, sizeof(cl_float4) * szGlobalWorkSize, contactPoints, 0, NULL, NULL);
ciErr1 |= clEnqueueReadBuffer(cqCommandQueue, cmDevResultNormals, CL_FALSE, 0, sizeof(cl_float4) * szGlobalWorkSize, contactNormals, 0, NULL, NULL);
ciErr1 |= clEnqueueReadBuffer(cqCommandQueue, cmDevResultDepths, CL_TRUE, 0, sizeof(cl_float) * szGlobalWorkSize, contactDepths, 0, NULL, NULL);
if (ciErr1 != CL_SUCCESS)
{
printf("Error in clEnqueueReadBuffer, Line %u in file %s !!!\n\n", __LINE__, __FILE__);
//Cleanup(EXIT_FAILURE);
exit(1);
}