Hello,
I have a grey level image (resolution w x h) stored as “dev_im_l” on the device. I want to calculate average values of a 5x5 neighborhood around each pixel and store them in “dev_luminance”. My plan is to store each pixel plus its 24 neighbors in shared memory with each pixel having a block.
I’m new in Cuda, so even if I made beginner mistakes, I appreciate your help.
#define PATCH_SIZE 5
__global__ void compute_luminance(uchar *dev_im_l, float *dev_luminance, int w, int h)
{
__shared__ uchar patch[PATCH_SIZE*PATCH_SIZE];
int patch_index = blockIdx.x*blockDim.x + threadIdx.x;
int pix_index = threadIdx.x;
int y = (blockIdx.x*blockDim.x + threadIdx.x) / w;
int x = (blockIdx.x*blockDim.x + threadIdx.x) % w;
//center row of patch
int row = threadIdx.x / PATCH_SIZE;
if (y >= 2 && y < h - 2 && x >= 0 && x < w - 2)
{
if (threadIdx.x >= (PATCH_SIZE / 2)*PATCH_SIZE & threadIdx.x < (PATCH_SIZE / 2 + 1)*PATCH_SIZE)
patch[pix_index] = dev_im_l[patch_index];
else
patch[pix_index] = dev_im_l[patch_index + w*(row - PATCH_SIZE / 2)];
}
__syncthreads();
dev_luminance[patch_index] = 0;
for (int i = 0; i < PATCH_SIZE*PATCH_SIZE; i++)
dev_luminance[patch_index] += (float)((int)patch[i]);
dev_luminance[patch_index] /= (float)(PATCH_SIZE*PATCH_SIZE);}
int main()
{
...
compute_luminance << <(w*h + PATCH_SIZE*PATCH_SIZE - 1) / (PATCH_SIZE*PATCH_SIZE), PATCH_SIZE*PATCH_SIZE >> >(dev_im_l, dev_im_r, dev_luminance, dev_contrast, dev_d, w, h);
...
return 0;
}