Texture Reading Problem

Hi,

I m facing a very strange problem here.

I am doing bayerpattern interpolation, for which I use a 2 dimensional one channel texture. This texture may have arbitrary size.

Everything works out fine, but when I use a width that is not dividible by 16, my whole image gets somehow distorted (skewed, pattern locations change). I think my texture is delivering wrong values, but I’m not sure.

Has anyone ever seen anything like this? I have been hunting this bug for two days now and I’m running out of ideas.

I have attached two results of a kernel run. The first one is 640x480 (baypatt1normal) and runs fine, the seconf one (baypatt1distorted) is 627x437 and shows very strange artifacts.

Regards,

Kwyjibo

#define BLOCK_SIDE_LENGTH	16

//Texture reference to work represent image data

texture<uchar1, 2, cudaReadModeElementType> bayTex;

//Enum represents the location in the bayer grid

enum bayerLocation

{

	RED             = 0,

	BLUE		= 1,

	GREENINBLUEROW	= 2,

	GREENINREDROW	= 3

};

//Returns the location 

__device__ inline bayerLocation getBayerLocation(unsigned int x, unsigned int y)

{

	if ((x % 2 == 0) && (y % 2) == 0) return GREENINREDROW;

	if ((x % 2 == 1) && (y % 2) == 0) return RED;

	if ((x % 2 == 0) && (y % 2) == 1) return BLUE;

					  return GREENINBLUEROW;

}

//Conversion Bayer pattern to BGR bilinear

__global__ void BayerGRBGRBilinearKernel(cudaImage8UDataDescriptor source, cudaImage8UDataDescriptor target)

{

	//Define some variables

	unsigned char* currentr;

	unsigned char* currentg;

	unsigned char* currentb;

	unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;

	unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;

	currentb = (unsigned char*) ((char*) target.d_pointer + y * target.d_pitch) + (x * target.d_padding);

	currentg = (unsigned char*) ((char*) target.d_pointer + y * target.d_pitch) + (x * target.d_padding) + 1;

	currentr = (unsigned char*) ((char*) target.d_pointer + y * target.d_pitch) + (x * target.d_padding) + 2;

	if (x < source.d_width && y < source.d_height)

	{

		//Get bayer location

		bayerLocation bayloc;

		bayloc = getBayerLocation(x, y);

		//Switch bayer location to interpolate colors for the different cases

		switch (bayloc)

		{

			//Red location

			case RED:

				*currentr =  tex2D(bayTex, x  , y  ).x;

				*currentg = (tex2D(bayTex, x-1, y  ).x + \

							 tex2D(bayTex, x+1, y  ).x + \

							 tex2D(bayTex, x  , y+1).x + \

							 tex2D(bayTex, x  , y-1).x)  / 4;

				*currentb = (tex2D(bayTex, x-1, y-1).x + \

							 tex2D(bayTex, x+1, y+1).x + \

							 tex2D(bayTex, x-1, y+1).x + \

							 tex2D(bayTex, x+1, y-1).x)  / 4;

				break;

			//Blue location

			case BLUE:

				*currentr = (tex2D(bayTex, x-1, y-1).x + \

							 tex2D(bayTex, x+1, y+1).x + \

							 tex2D(bayTex, x-1, y+1).x + \

							 tex2D(bayTex, x+1, y-1).x)  / 4;

				*currentg = (tex2D(bayTex, x-1, y  ).x + \

							 tex2D(bayTex, x+1, y  ).x + \

							 tex2D(bayTex, x  , y+1).x + \

							 tex2D(bayTex, x  , y-1).x)  / 4;

				*currentb =  tex2D(bayTex, x  , y  ).x;

				break;

			//Green in blue row

			case GREENINBLUEROW:

				*currentr = (tex2D(bayTex, x  , y+1).x + \

							 tex2D(bayTex, x  , y-1).x)  / 2;

				*currentg =  tex2D(bayTex, x  , y  ).x;

				*currentb = (tex2D(bayTex, x+1, y  ).x + \

							 tex2D(bayTex, x-1, y  ).x)  / 2;

				break;

			//Green in red row

			case GREENINREDROW:

				*currentr = (tex2D(bayTex, x+1, y  ).x + \

							 tex2D(bayTex, x-1, y  ).x)  / 2;

				*currentg =  tex2D(bayTex, x  , y  ).x;

				*currentb = (tex2D(bayTex, x  , y+1).x + \

							 tex2D(bayTex, x  , y-1).x)  / 2;

				break;

		}

	}

}

//Wrapper function for Kernel

void BayerGRBGRBilinear(const cudaImage8UDataDescriptor* source, const cudaImage8UDataDescriptor* target)

{

	//Bind Texture to reference

	const textureReference* bayTexPtr;

	cudaGetTextureReference(&bayTexPtr, "bayTex");

	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar1>();

	//Set acces parameters

	bayTex.addressMode[0] = cudaAddressModeClamp;

	bayTex.addressMode[1] = cudaAddressModeClamp;

	//Bind pitched data to texture

	unsigned int width_px = source->d_width/source->d_padding;

	cudaBindTexture2D(NULL, bayTexPtr, source->d_pointer, &channelDesc, width_px, source->d_height, source->d_pitch);

	//Invoke the kernel

	dim3 threadsPerBlock(BLOCK_SIDE_LENGTH, BLOCK_SIDE_LENGTH);

	//Calculate number of blocks

	unsigned int blocksx = (width_px		+ BLOCK_SIDE_LENGTH - 1) / BLOCK_SIDE_LENGTH;

	unsigned int blocksy = (source->d_height	+ BLOCK_SIDE_LENGTH - 1) / BLOCK_SIDE_LENGTH;

	dim3 numBlocks(blocksx, blocksy);

	//Start Kernel

	BayerGRBGRBilinearKernel<<<numBlocks, threadsPerBlock>>>(*source, *target);

}

baypatt1distorted.png
bayerpatt1normal.png

Hi,

I m facing a very strange problem here.

I am doing bayerpattern interpolation, for which I use a 2 dimensional one channel texture. This texture may have arbitrary size.

Everything works out fine, but when I use a width that is not dividible by 16, my whole image gets somehow distorted (skewed, pattern locations change). I think my texture is delivering wrong values, but I’m not sure.

Has anyone ever seen anything like this? I have been hunting this bug for two days now and I’m running out of ideas.

I have attached two results of a kernel run. The first one is 640x480 (baypatt1normal) and runs fine, the seconf one (baypatt1distorted) is 627x437 and shows very strange artifacts.

Regards,

Kwyjibo

#define BLOCK_SIDE_LENGTH	16

//Texture reference to work represent image data

texture<uchar1, 2, cudaReadModeElementType> bayTex;

//Enum represents the location in the bayer grid

enum bayerLocation

{

	RED             = 0,

	BLUE		= 1,

	GREENINBLUEROW	= 2,

	GREENINREDROW	= 3

};

//Returns the location 

__device__ inline bayerLocation getBayerLocation(unsigned int x, unsigned int y)

{

	if ((x % 2 == 0) && (y % 2) == 0) return GREENINREDROW;

	if ((x % 2 == 1) && (y % 2) == 0) return RED;

	if ((x % 2 == 0) && (y % 2) == 1) return BLUE;

					  return GREENINBLUEROW;

}

//Conversion Bayer pattern to BGR bilinear

__global__ void BayerGRBGRBilinearKernel(cudaImage8UDataDescriptor source, cudaImage8UDataDescriptor target)

{

	//Define some variables

	unsigned char* currentr;

	unsigned char* currentg;

	unsigned char* currentb;

	unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;

	unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;

	currentb = (unsigned char*) ((char*) target.d_pointer + y * target.d_pitch) + (x * target.d_padding);

	currentg = (unsigned char*) ((char*) target.d_pointer + y * target.d_pitch) + (x * target.d_padding) + 1;

	currentr = (unsigned char*) ((char*) target.d_pointer + y * target.d_pitch) + (x * target.d_padding) + 2;

	if (x < source.d_width && y < source.d_height)

	{

		//Get bayer location

		bayerLocation bayloc;

		bayloc = getBayerLocation(x, y);

		//Switch bayer location to interpolate colors for the different cases

		switch (bayloc)

		{

			//Red location

			case RED:

				*currentr =  tex2D(bayTex, x  , y  ).x;

				*currentg = (tex2D(bayTex, x-1, y  ).x + \

							 tex2D(bayTex, x+1, y  ).x + \

							 tex2D(bayTex, x  , y+1).x + \

							 tex2D(bayTex, x  , y-1).x)  / 4;

				*currentb = (tex2D(bayTex, x-1, y-1).x + \

							 tex2D(bayTex, x+1, y+1).x + \

							 tex2D(bayTex, x-1, y+1).x + \

							 tex2D(bayTex, x+1, y-1).x)  / 4;

				break;

			//Blue location

			case BLUE:

				*currentr = (tex2D(bayTex, x-1, y-1).x + \

							 tex2D(bayTex, x+1, y+1).x + \

							 tex2D(bayTex, x-1, y+1).x + \

							 tex2D(bayTex, x+1, y-1).x)  / 4;

				*currentg = (tex2D(bayTex, x-1, y  ).x + \

							 tex2D(bayTex, x+1, y  ).x + \

							 tex2D(bayTex, x  , y+1).x + \

							 tex2D(bayTex, x  , y-1).x)  / 4;

				*currentb =  tex2D(bayTex, x  , y  ).x;

				break;

			//Green in blue row

			case GREENINBLUEROW:

				*currentr = (tex2D(bayTex, x  , y+1).x + \

							 tex2D(bayTex, x  , y-1).x)  / 2;

				*currentg =  tex2D(bayTex, x  , y  ).x;

				*currentb = (tex2D(bayTex, x+1, y  ).x + \

							 tex2D(bayTex, x-1, y  ).x)  / 2;

				break;

			//Green in red row

			case GREENINREDROW:

				*currentr = (tex2D(bayTex, x+1, y  ).x + \

							 tex2D(bayTex, x-1, y  ).x)  / 2;

				*currentg =  tex2D(bayTex, x  , y  ).x;

				*currentb = (tex2D(bayTex, x  , y+1).x + \

							 tex2D(bayTex, x  , y-1).x)  / 2;

				break;

		}

	}

}

//Wrapper function for Kernel

void BayerGRBGRBilinear(const cudaImage8UDataDescriptor* source, const cudaImage8UDataDescriptor* target)

{

	//Bind Texture to reference

	const textureReference* bayTexPtr;

	cudaGetTextureReference(&bayTexPtr, "bayTex");

	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar1>();

	//Set acces parameters

	bayTex.addressMode[0] = cudaAddressModeClamp;

	bayTex.addressMode[1] = cudaAddressModeClamp;

	//Bind pitched data to texture

	unsigned int width_px = source->d_width/source->d_padding;

	cudaBindTexture2D(NULL, bayTexPtr, source->d_pointer, &channelDesc, width_px, source->d_height, source->d_pitch);

	//Invoke the kernel

	dim3 threadsPerBlock(BLOCK_SIDE_LENGTH, BLOCK_SIDE_LENGTH);

	//Calculate number of blocks

	unsigned int blocksx = (width_px		+ BLOCK_SIDE_LENGTH - 1) / BLOCK_SIDE_LENGTH;

	unsigned int blocksy = (source->d_height	+ BLOCK_SIDE_LENGTH - 1) / BLOCK_SIDE_LENGTH;

	dim3 numBlocks(blocksx, blocksy);

	//Start Kernel

	BayerGRBGRBilinearKernel<<<numBlocks, threadsPerBlock>>>(*source, *target);

}

you have

    unsigned int blocksx = (width_px                + BLOCK_SIDE_LENGTH - 1) / BLOCK_SIDE_LENGTH;
    unsigned int blocksy = (source->d_height        + BLOCK_SIDE_LENGTH - 1) / BLOCK_SIDE_LENGTH;

what you want is

    unsigned int blocksx = width_px / BLOCK_SIDE_LENGTH  + (width%BLOCK_SIDE_LENGTH == 0?0:1);
    unsigned int blocksy = source->d_height / BLOCK_SIDE_LENGTH  + (source->d_height%BLOCK_SIDE_LENGTH == 0?0:1);

you have

    unsigned int blocksx = (width_px                + BLOCK_SIDE_LENGTH - 1) / BLOCK_SIDE_LENGTH;
    unsigned int blocksy = (source->d_height        + BLOCK_SIDE_LENGTH - 1) / BLOCK_SIDE_LENGTH;

what you want is

    unsigned int blocksx = width_px / BLOCK_SIDE_LENGTH  + (width%BLOCK_SIDE_LENGTH == 0?0:1);
    unsigned int blocksy = source->d_height / BLOCK_SIDE_LENGTH  + (source->d_height%BLOCK_SIDE_LENGTH == 0?0:1);