Hi ,
I have a problem with this code(Sobel Filter):
__global__ void SobelFilter(unsigned char* g_DataIn, unsigned char* g_DataOut, int width, int height)
{
__shared__ unsigned char sharedMem[BLOCK_HEIGHT * BLOCK_WIDTH];
float s_SobelMatrix[9];
s_SobelMatrix[0] = -1;
s_SobelMatrix[1] = 0;
s_SobelMatrix[2] = 1;
s_SobelMatrix[3] = -2;
s_SobelMatrix[4] = 0;
s_SobelMatrix[5] = 2;
s_SobelMatrix[6] = -1;
s_SobelMatrix[7] = 0;
s_SobelMatrix[8] = 1;
// Computer the X and Y global coordinates
int x = blockIdx.x * TILE_WIDTH + threadIdx.x ;//- FILTER_RADIUS;
int y = blockIdx.y * TILE_HEIGHT + threadIdx.y ;//- FILTER_RADIUS;
// Get the Global index into the original image
int index = y * (width) + x;
// Perform the first load of values into shared memory
int sharedIndex = threadIdx.y * blockDim.y + threadIdx.x;
sharedMem[sharedIndex] = g_DataIn[index];
__syncthreads();
int i, j, rows, cols, startCol, endCol, startRow, endRow;
rows = height;
cols = width;
startCol = 1;
endCol = cols - 1;
startRow = 1;
endRow = rows - 1;
// Go through all inner pixel positions
for(i=startRow; i<endRow; i++) {
for(j=startCol; j<endCol; j++) {
// sum up the 9 values to calculate both the direction x and direction y
float sumX = 0, sumY=0;
for(int dy = -FILTER_RADIUS; dy <= FILTER_RADIUS; dy++) {
for(int dx = -FILTER_RADIUS; dx <= FILTER_RADIUS; dx++) {
float Pixel = (float)(sharedMem[i*width + j + (dy * width + dx)]);
sumX += Pixel * s_SobelMatrix[(dy + FILTER_RADIUS) * FILTER_DIAMETER +
(dx+FILTER_RADIUS)];
sumY += Pixel * s_SobelMatrix[(dx + FILTER_RADIUS) * FILTER_DIAMETER +
(dy+FILTER_RADIUS)];
}
}
g_DataOut[i*width + j] = (abs(sumX) + abs(sumY)) > EDGE_VALUE_THRESHOLD ? 255
: 0;
}
}
}
when I use shared memory,it doesn’t work(output image is a black image)
if I use DataIn (instead of shared memory), it generates the correct image
Can anyone tell me what’s wrong with shared memory here?
thanks