Hi,
I m facing a very strange problem here.
I am doing bayerpattern interpolation, for which I use a 2 dimensional one channel texture. This texture may have arbitrary size.
Everything works out fine, but when I use a width that is not dividible by 16, my whole image gets somehow distorted (skewed, pattern locations change). I think my texture is delivering wrong values, but I’m not sure.
Has anyone ever seen anything like this? I have been hunting this bug for two days now and I’m running out of ideas.
I have attached two results of a kernel run. The first one is 640x480 (baypatt1normal) and runs fine, the seconf one (baypatt1distorted) is 627x437 and shows very strange artifacts.
Regards,
Kwyjibo
#define BLOCK_SIDE_LENGTH 16
//Texture reference to work represent image data
texture<uchar1, 2, cudaReadModeElementType> bayTex;
//Enum represents the location in the bayer grid
enum bayerLocation
{
RED = 0,
BLUE = 1,
GREENINBLUEROW = 2,
GREENINREDROW = 3
};
//Returns the location
__device__ inline bayerLocation getBayerLocation(unsigned int x, unsigned int y)
{
if ((x % 2 == 0) && (y % 2) == 0) return GREENINREDROW;
if ((x % 2 == 1) && (y % 2) == 0) return RED;
if ((x % 2 == 0) && (y % 2) == 1) return BLUE;
return GREENINBLUEROW;
}
//Conversion Bayer pattern to BGR bilinear
__global__ void BayerGRBGRBilinearKernel(cudaImage8UDataDescriptor source, cudaImage8UDataDescriptor target)
{
//Define some variables
unsigned char* currentr;
unsigned char* currentg;
unsigned char* currentb;
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
currentb = (unsigned char*) ((char*) target.d_pointer + y * target.d_pitch) + (x * target.d_padding);
currentg = (unsigned char*) ((char*) target.d_pointer + y * target.d_pitch) + (x * target.d_padding) + 1;
currentr = (unsigned char*) ((char*) target.d_pointer + y * target.d_pitch) + (x * target.d_padding) + 2;
if (x < source.d_width && y < source.d_height)
{
//Get bayer location
bayerLocation bayloc;
bayloc = getBayerLocation(x, y);
//Switch bayer location to interpolate colors for the different cases
switch (bayloc)
{
//Red location
case RED:
*currentr = tex2D(bayTex, x , y ).x;
*currentg = (tex2D(bayTex, x-1, y ).x + \
tex2D(bayTex, x+1, y ).x + \
tex2D(bayTex, x , y+1).x + \
tex2D(bayTex, x , y-1).x) / 4;
*currentb = (tex2D(bayTex, x-1, y-1).x + \
tex2D(bayTex, x+1, y+1).x + \
tex2D(bayTex, x-1, y+1).x + \
tex2D(bayTex, x+1, y-1).x) / 4;
break;
//Blue location
case BLUE:
*currentr = (tex2D(bayTex, x-1, y-1).x + \
tex2D(bayTex, x+1, y+1).x + \
tex2D(bayTex, x-1, y+1).x + \
tex2D(bayTex, x+1, y-1).x) / 4;
*currentg = (tex2D(bayTex, x-1, y ).x + \
tex2D(bayTex, x+1, y ).x + \
tex2D(bayTex, x , y+1).x + \
tex2D(bayTex, x , y-1).x) / 4;
*currentb = tex2D(bayTex, x , y ).x;
break;
//Green in blue row
case GREENINBLUEROW:
*currentr = (tex2D(bayTex, x , y+1).x + \
tex2D(bayTex, x , y-1).x) / 2;
*currentg = tex2D(bayTex, x , y ).x;
*currentb = (tex2D(bayTex, x+1, y ).x + \
tex2D(bayTex, x-1, y ).x) / 2;
break;
//Green in red row
case GREENINREDROW:
*currentr = (tex2D(bayTex, x+1, y ).x + \
tex2D(bayTex, x-1, y ).x) / 2;
*currentg = tex2D(bayTex, x , y ).x;
*currentb = (tex2D(bayTex, x , y+1).x + \
tex2D(bayTex, x , y-1).x) / 2;
break;
}
}
}
//Wrapper function for Kernel
void BayerGRBGRBilinear(const cudaImage8UDataDescriptor* source, const cudaImage8UDataDescriptor* target)
{
//Bind Texture to reference
const textureReference* bayTexPtr;
cudaGetTextureReference(&bayTexPtr, "bayTex");
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar1>();
//Set acces parameters
bayTex.addressMode[0] = cudaAddressModeClamp;
bayTex.addressMode[1] = cudaAddressModeClamp;
//Bind pitched data to texture
unsigned int width_px = source->d_width/source->d_padding;
cudaBindTexture2D(NULL, bayTexPtr, source->d_pointer, &channelDesc, width_px, source->d_height, source->d_pitch);
//Invoke the kernel
dim3 threadsPerBlock(BLOCK_SIDE_LENGTH, BLOCK_SIDE_LENGTH);
//Calculate number of blocks
unsigned int blocksx = (width_px + BLOCK_SIDE_LENGTH - 1) / BLOCK_SIDE_LENGTH;
unsigned int blocksy = (source->d_height + BLOCK_SIDE_LENGTH - 1) / BLOCK_SIDE_LENGTH;
dim3 numBlocks(blocksx, blocksy);
//Start Kernel
BayerGRBGRBilinearKernel<<<numBlocks, threadsPerBlock>>>(*source, *target);
}