Sobel Filter ith shared memory

Hi ,

I have a problem with this code(Sobel Filter):

__global__ void SobelFilter(unsigned char* g_DataIn, unsigned char* g_DataOut, int width, int height)

{

   __shared__ unsigned char sharedMem[BLOCK_HEIGHT * BLOCK_WIDTH];

   float s_SobelMatrix[9];

s_SobelMatrix[0] = -1;

    s_SobelMatrix[1] = 0;

    s_SobelMatrix[2] = 1;

s_SobelMatrix[3] = -2;

    s_SobelMatrix[4] = 0;

    s_SobelMatrix[5] = 2;

s_SobelMatrix[6] = -1;

    s_SobelMatrix[7] = 0;

    s_SobelMatrix[8] = 1;

// Computer the X and Y global coordinates

   int x = blockIdx.x * TILE_WIDTH + threadIdx.x ;//- FILTER_RADIUS;

   int y = blockIdx.y * TILE_HEIGHT + threadIdx.y ;//- FILTER_RADIUS;

// Get the Global index into the original image

   int index = y * (width) + x;

// Perform the first load of values into shared memory

   int sharedIndex = threadIdx.y * blockDim.y + threadIdx.x;

   sharedMem[sharedIndex] = g_DataIn[index];

   __syncthreads();

int i, j, rows, cols, startCol, endCol, startRow, endRow;

rows = height;

cols = width;

startCol = 1;

endCol = cols - 1;

startRow = 1;

endRow = rows - 1;

// Go through all inner pixel positions

for(i=startRow; i<endRow; i++) {

for(j=startCol; j<endCol; j++) {

// sum up the 9 values to calculate both the direction x and direction y

float sumX = 0, sumY=0;

for(int dy = -FILTER_RADIUS; dy <= FILTER_RADIUS; dy++) {

for(int dx = -FILTER_RADIUS; dx <= FILTER_RADIUS; dx++) {

float Pixel = (float)(sharedMem[i*width + j + (dy * width + dx)]); 	

sumX += Pixel * s_SobelMatrix[(dy + FILTER_RADIUS) * FILTER_DIAMETER +

(dx+FILTER_RADIUS)];

sumY += Pixel * s_SobelMatrix[(dx + FILTER_RADIUS) * FILTER_DIAMETER +

(dy+FILTER_RADIUS)];

}

}

g_DataOut[i*width + j] = (abs(sumX) + abs(sumY)) > EDGE_VALUE_THRESHOLD ? 255

: 0;

}

}

}

when I use shared memory,it doesn’t work(output image is a black image)

if I use DataIn (instead of shared memory), it generates the correct image

Can anyone tell me what’s wrong with shared memory here?

thanks

Any suggestions?

I notice you use BLOCK_HEIGHT and BLOCK_WIDTH in your shared memory declaration, while later TILE_WIDTH and TILE_HEIGHT are used for apparently the same purpose. Could this be related to the problem?

Do you check error codes to make sure the kernel is executed correctly?

It was solved.

I used “sharedIndex” instead of indexes for s_Sobel (“(dx + FILTER_RADIUS)” and “(dy + FILTER_RADIUS)”)

float Pixel = (float)(sharedMem[sharedIndex + (dy * blockDim.x + dx)]);

I also shouldn’t have wrote 2 “for” loops (variables “i” and “j”) and should have wrote it just to satisfy the limits

if((threadIdx.x >= FILTER_RADIUS) && (threadIdx.x < (BLOCK_WIDTH - FILTER_RADIUS))&&(threadIdx.y >= FILTER_RADIUS) && (threadIdx.y < (BLOCK_HEIGHT - FILTER_RADIUS)))

The SDK sample sobelFilter also implements Sobel.