Hi,
I’m newbie to the development of OpenCL. I have a simple convolution kernel for RGB 24bit image :
__kernel void ConvolveRGB(const global uchar *in,
global uchar *out,
int width, int height)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int row = width * 3;
if (x > 0 && x < width - 1 && y > 0 && y < height - 1)
{
for (int b = 0; b < 3; ++b)
{
int t = (y * width + x) * 3 + b;
int v = 1 * in[t - row - 3]
+2 * in[t - row]
+1 * in[t - row + 3]
+2 * in[t - 3]
+4 * in[t]
+2 * in[t + 3]
+1 * in[t + row - 3]
+2 * in[t + row]
+1 * in[t + row + 3];
v /= 16;
if (v > 255) v = 255;
out[t] = v;
}
}
}
And the caller program like :
void ProcessRGBOnGPU()
{
size_t g_work_size = {IMAGE_WIDTH, IMAGE_HEIGHT};
size_t l_work_size = {16, 16};
cl_event events[1];
clEnqueueWriteBuffer(CLQueue, CLRGBInBuf, false, 0, IMAGE_WIDTH * IMAGE_HEIGHT * 3, RGBIn, 0, 0, 0);
clEnqueueNDRangeKernel(CLQueue, CLKnlRGB, 2, 0,
g_work_size,
l_work_size,
0, 0, events);
clEnqueueReadBuffer(CLQueue, CLRGBOutBuf, true, 0, IMAGE_WIDTH * IMAGE_HEIGHT * 3,
RGBOutGPU, 0, 0, 0);
}
The problem is the same program running on my same MacBook Pro (9400M/9600M GT) notebook for 2048*2048 image needs about 5ms, but on Windows XP it takes about 250ms! Why the performance is so much different?
Any help is really appreciated. Thanks in advance.
ZhaoYu