Hi,
I have a problem with a simple code.
I will implement a Sobel Filter, but the destination buffer is the same of the source…
The program run without errors
Here is my code… please, someone explain me the error… :">
The code runs very well in debug mode…
(Excuse me for my English :"> )
void CUDA_SimpleSobel(unsigned char* src, unsigned char* dst, int size)
{
//load src1 to the device
unsigned char* src1D;
cudaError_t s = cudaMalloc((void**)&src1D, size);
if (s != cudaSuccess)
{
printf("Error allocating Sobel src\n");
}
s = cudaMemcpy(src1D, src, size, cudaMemcpyHostToDevice);
if (s != cudaSuccess)
{
printf("Error copying Sobel src\n");
}
// Allocate destination to the device memory
unsigned char* dstD;
s = cudaMalloc((void**)&dstD, size);
if (s != cudaSuccess)
{
printf("Error allocating Sobel dst\n");
}
dim3 dimBlock(LINE_SIZE);
dim3 dimGrid(size / dimBlock.x);
//cudaEventRecord(start, 0);
//CUDA_SobelImpl<<<dimGrid, dimBlock>>>(src1D, dstD);
cudaMemcpy(dst, dstD, size, cudaMemcpyDeviceToHost);
//cudaEventRecord(stop, 0);
// Free device memory
cudaFree(src1D);
cudaFree(dstD);
}
__global__ void CUDA_SobelImpl(unsigned char* src, unsigned char* dst)
{
//Block index
int bx = blockIdx.x;
// Thread index
int tx = threadIdx.x;
dst[tx + (bx + 1) * LINE_SIZE] = 0;
return;
if (bx >= 144 - 1)
{
return;
}
unsigned char* line1;
unsigned char* line2;
unsigned char* line3;
line1 = src + tx + bx * LINE_SIZE;
line2 = src + tx + (bx + 1) * LINE_SIZE;
line3 = src + tx + (bx + 2) * LINE_SIZE;
// if (tx < LINE_SIZE - 1)
dst[tx + (bx + 1) * LINE_SIZE] = ComputeSobel(line1[0], line1[1], line1[2],
line2[0], line2[1], line2[2],
line3[0], line3[1], line3[2]);
}
__device__ unsigned char ComputeSobel(unsigned char ul, // upper left
unsigned char um, // upper middle
unsigned char ur, // upper right
unsigned char ml, // middle left
unsigned char mm, // middle (unused)
unsigned char mr, // middle right
unsigned char ll, // lower left
unsigned char lm, // lower middle
unsigned char lr)
{
short horz = ur + 2*mr + lr - ul - 2*ml - ll;
short vert = ul + 2*um + ur - ll - 2*lm - lr;
short sum = (short)(abs(horz) + abs(vert)) / 2;
sum = (sum > 0xFF) ? 0xFF : sum;
return (unsigned char)sum;
}