Hi !
I am developing a 2D median filter (3 * 3) whose code is given below. I thought I managed the edges but apparently not correctly because the image has black bars.
But I can not see what is wrong with writing my kernel …
#define BLOCK_X 16
#define BLOCK_Y 16
texture<unsigned char> imgTex;
__global__ void Kernel(unsigned char *d_out, int nx, int ny)
{
// Prepare guards for later accesses
int tx = threadIdx.x;
int ty = threadIdx.y;
// Guards : at edge ?
bool is_x_top = (tx == 0), is_x_bot = (tx == BLOCK_X - 1);
bool is_y_top = (ty == 0), is_y_bot = (ty == BLOCK_Y - 1);
__shared__ unsigned char smem[BLOCK_X + 2][BLOCK_Y + 2];
// Zero out shared memory (zero padding)
if (is_x_top) SMEM(tx-1, ty ) = 0;
else if (is_x_bot) SMEM(tx+1, ty ) = 0;
if (is_y_top)
{ SMEM(tx , ty-1) = 0;
if (is_x_top) SMEM(tx-1, ty-1) = 0;
else if (is_x_bot) SMEM(tx+1, ty-1) = 0;
}
else if (is_y_bot)
{ SMEM(tx, ty+1) = 0;
if (is_x_top) SMEM(tx-1, ty+1) = 0;
else if (is_x_bot) SMEM(tx+1, ty+1) = 0;
}
int x = blockIdx.x * blockDim.x + tx;
int y = blockIdx.y * blockDim.y + ty;
is_x_top &= (x > 0); is_x_bot &= (x < (nx - 1));
is_y_top &= (y > 0); is_y_bot &= (y < (ny - 1));
// Each thread pulls in it's own pixel
SMEM(tx, ty) = IN(x, y);
if (is_x_top) SMEM(tx - 1, ty ) = IN(x - 1, y );
else if (is_x_bot) SMEM(tx + 1, ty ) = IN(x + 1, y );
if (is_y_top)
{ SMEM(tx , ty - 1) = IN(x , y - 1);
if (is_x_top) SMEM(tx - 1, ty - 1) = IN(x - 1, y - 1);
else if (is_x_bot) SMEM(tx + 1, ty - 1) = IN(x + 1, y - 1);
}
else if (is_y_bot)
{ SMEM(tx , ty + 1) = IN(x , y + 1);
if (is_x_top) SMEM(tx - 1, ty + 1) = IN(x - 1, y + 1);
else if (is_x_bot) SMEM(tx + 1, ty + 1) = IN(x + 1, y + 1);
}
__syncthreads();
unsigned char v[9] = { SMEM(tx - 1, ty - 1), SMEM(tx , ty - 1), SMEM(tx + 1, ty - 1),
SMEM(tx - 1, ty ), SMEM(tx , ty ), SMEM(tx + 1, ty ),
SMEM(tx - 1, ty + 1), SMEM(tx , ty + 1), SMEM(tx + 1, ty + 1) };
for (int i = 0; i < 9; i++)
{
for (int j = i + 1; j < 9; j++)
{
if (v[i] > v[j])
{
unsigned char tmp = v[i];
v[i] = v[j];
v[j] = tmp;
}
}
}
d_out[y * nx + x] = v[4];
}
The texture is 1D, binded with linear memory.