2D Texture access How can I access pixels from 2D texture

Hello,

I have some problem with textures… namely, I cannot understand how do they work.

I’m loading some image from host to the texture like this:

/ allocate CUDA array in the device and copy image data

   cudaChannelFormatDesc channelDesc;

	  channelDesc = cudaCreateChannelDesc(32, 32,32, 32,cudaChannelFormatKindUnsigned );

cudaArray *cuArray;

	  cutilSafeCall( cudaMallocArray( &cuArray, &channelDesc, imgW, imgH ) );

	

cutilSafeCall( cudaMemcpyToArray( cuArray, 0, 0, img->getImgArray(), mem_size, cudaMemcpyHostToDevice ));

// set texture parameters

   texRef.addressMode[0] = cudaAddressModeClamp;

   texRef.addressMode[1] = cudaAddressModeClamp;

   texRef.filterMode = cudaFilterModePoint;

   texRef.normalized = false;

Then I wrote simple copy kernel, that copies pixels to some output array. The point is, I don’t know how can I access pixels in the texture . I used examples from NVIDIA but I still cannot understand how do the use it (everything through bit operations …and this doesn’t work, in my case, at all — why ?! and why do the shift it by 16, 24 … !!! - could somebody explain me how does texture and those bit operations work ? - I would be glad for every answer

This is my kernel :

texture<uchar4, 2, cudaReadModeElementType> texRef;

__global__ 

void copy_kernel ( IType *dst, int imgW, int imgH ) {

   const int idx = blockIdx.x*blockDim.x + threadIdx.x;

   const int idy = blockIdx.y*blockDim.y + threadIdx.y;

const float x = ( float )idx + 0.5f;

   const float y = ( float )idy + 0.5f;

if( idx < imgW && idy < imgH ) {

uchar4 rgba = tex2D( texRef, y, x );

	 dst[ (imgW*idx + idy) + 0 ] = ((uchar)(rgba.w ) << 24) | ((uchar)(rgba.z) << 16) | ((uchar)(rgba.y) << 8) | ((uchar)(rgba.x) << 0);

/*	 dst[ (imgW*idx + idy) + 1 ] = rgba.y;

	 dst[ (imgW*idx + idy) + 2 ] = rgba.z;

	 dst[ (imgW*idx + idy) + 3 ] = rgba.w;

*/

 }

}

This is what I’m getting … (see attch)
lena.png

That’s one stripey Lena!

It’s hard to tell without the complete code, but it looks like your channel description (32, 32, 32,3 2) doesn’t match the texture reference type (uchar4)?

The shifts are just building a 32-bit int containing the 4 8-bit rgba color components.

Thank you for your answer … I’m giving up now, whole day and I’m getting like this (!) see attch. Could somebody tell what I’m doing wrong in my code:

// data type of our images

typedef unsigned int IType;

void run_gpu( const int argc, const char **argv ) {

// source image on the host side

   uchar4 *src_img;

cudaInit( argc, argv );

IplImage *cv_img;

   const char *file_name = "lena.jpg";

if( (cv_img = cvLoadImage( file_name, CV_LOAD_IMAGE_UNCHANGED )) == NULL ) {

	   fprintf(stderr,  "Failed to load %s\n", file_name );

	 exit( EXIT_FAILURE );

   } 

loadImage( &src_img, cv_img );

unsigned int imgH = cv_img->height;

   unsigned int imgW = cv_img->width;

   short channels = cv_img->nChannels;

cudaArray *cuArray;

	  cutilSafeCall( cudaMallocArray( &cuArray, &uchar4tex, imgW, imgH ) );

	

cutilSafeCall( cudaMemcpyToArray( cuArray, 0, 0, src_img,channels*imgW*imgH*sizeof( uchar4) , cudaMemcpyHostToDevice ));

// set texture parameters

   texRef.addressMode[0] = cudaAddressModeClamp;

   texRef.addressMode[1] = cudaAddressModeClamp;

   texRef.filterMode = cudaFilterModePoint;

   texRef.normalized = false;

// bind the array to the texture

   cutilSafeCall( cudaBindTextureToArray( texRef, cuArray/*, channelDesc */) );

		   

// allocate device momory for results 

   IType *d_out;

	  cutilSafeCall( cudaMalloc( (void **)&d_out, channels*imgW*imgH*sizeof( IType )) );

// execute the kernel   

   dim3 threads( BLOCKDIM_X, BLOCKDIM_Y );

   dim3 grid_size( divUp( imgW, BLOCKDIM_X ), divUp( imgH, BLOCKDIM_Y ) );

// warmup

   copy_kernel<<< grid_size, threads >>>( d_out, imgW, imgH );

   cutilSafeCall( cudaThreadSynchronize() );

	  copy_kernel<<< grid_size, threads >>>( d_out, imgW, imgH );

// check if kernel generated an error 

	  cutilCheckMsg( "Kernel invocation failed" );

// just to ensure thar kernel has finished his job !

	  cudaThreadSynchronize();

// allocate host memory for results 

   IType *h_out;

   cutilSafeCall ( cudaMallocHost( (void **)&h_out, imgH*imgW*sizeof( IType )) );

// get results from the device 

   cutilSafeCall( cudaMemcpy( h_out, d_out, imgH*imgW*sizeof( IType /*uchar4*/ ), cudaMemcpyDeviceToHost ) );

// save array as image 

   IplImage *img_out = cvCreateImage( cvSize( imgW, imgH ), IPL_DEPTH_8U, 3 );

	  img_out->imageData = (char*)h_out;

	  cvNamedWindow( "GPU", CV_WINDOW_AUTOSIZE );

	  cvShowImage( "GPU", img_out );   

	  cvWaitKey();

   cvDestroyAllWindows();

   cvReleaseImage( &img_out );

cutilSafeCall(cudaFreeHost( h_out ));

   cutilSafeCall(cudaFree( d_out ));

   cutilSafeCall(cudaFreeArray( cuArray ));

cudaThreadExit();

}

Kernel :

/ declare texture reference for 2D uchar texture

texture<uchar4, 2, cudaReadModeElementType> texRef;

cudaChannelFormatDesc uchar4tex = cudaCreateChannelDesc(32,32,32,32,cudaChannelFormatKindUnsigned );

__device__ IType make_color(float r, float g, float  b, float a){

	return

		((int)(a/**255.0f*/ ) << 24) |

		((int)(b/**255.0f*/ ) << 16) |

		((int)(g/**255.0f*/ ) <<  8) |

		((int)(r/**255.0f*/) <<  0);

}

__global__ 

void copy_kernel ( IType *dst, int imgW, int imgH ) {

   const int idx = blockIdx.x*blockDim.x + threadIdx.x;

   const int idy = blockIdx.y*blockDim.y + threadIdx.y;

const float x = ( float )idx + 0.5f;

   const float y = ( float )idy + 0.5f;

	if( idx < imgW && idy < imgH ) {

	  uchar4 rgba = tex2D( texRef, x, y );

	   dst[ (imgW*idy + idx) + 0 ] = make_color( rgba.x, rgba.y, rgba.z, 0 );

	}

}

Could somebody help me I’m getting really frustrated with those textures…

One question more: I would like to use texture only to load image to the device, and then convolve image with some filters … Is it right way ? or should I work with global memory ?

In advance thanks a lot for all answers !

mpwm
lena2.png

Thank you for your answer … I’m giving up now, whole day and I’m getting like this (!) see attch. Could somebody tell what I’m doing wrong in my code:

// data type of our images

typedef unsigned int IType;

void run_gpu( const int argc, const char **argv ) {

// source image on the host side

   uchar4 *src_img;

cudaInit( argc, argv );

IplImage *cv_img;

   const char *file_name = "lena.jpg";

if( (cv_img = cvLoadImage( file_name, CV_LOAD_IMAGE_UNCHANGED )) == NULL ) {

	   fprintf(stderr,  "Failed to load %s\n", file_name );

	 exit( EXIT_FAILURE );

   } 

loadImage( &src_img, cv_img );

unsigned int imgH = cv_img->height;

   unsigned int imgW = cv_img->width;

   short channels = cv_img->nChannels;

cudaArray *cuArray;

	  cutilSafeCall( cudaMallocArray( &cuArray, &uchar4tex, imgW, imgH ) );

	

cutilSafeCall( cudaMemcpyToArray( cuArray, 0, 0, src_img,channels*imgW*imgH*sizeof( uchar4) , cudaMemcpyHostToDevice ));

// set texture parameters

   texRef.addressMode[0] = cudaAddressModeClamp;

   texRef.addressMode[1] = cudaAddressModeClamp;

   texRef.filterMode = cudaFilterModePoint;

   texRef.normalized = false;

// bind the array to the texture

   cutilSafeCall( cudaBindTextureToArray( texRef, cuArray/*, channelDesc */) );

		   

// allocate device momory for results 

   IType *d_out;

	  cutilSafeCall( cudaMalloc( (void **)&d_out, channels*imgW*imgH*sizeof( IType )) );

// execute the kernel   

   dim3 threads( BLOCKDIM_X, BLOCKDIM_Y );

   dim3 grid_size( divUp( imgW, BLOCKDIM_X ), divUp( imgH, BLOCKDIM_Y ) );

// warmup

   copy_kernel<<< grid_size, threads >>>( d_out, imgW, imgH );

   cutilSafeCall( cudaThreadSynchronize() );

	  copy_kernel<<< grid_size, threads >>>( d_out, imgW, imgH );

// check if kernel generated an error 

	  cutilCheckMsg( "Kernel invocation failed" );

// just to ensure thar kernel has finished his job !

	  cudaThreadSynchronize();

// allocate host memory for results 

   IType *h_out;

   cutilSafeCall ( cudaMallocHost( (void **)&h_out, imgH*imgW*sizeof( IType )) );

// get results from the device 

   cutilSafeCall( cudaMemcpy( h_out, d_out, imgH*imgW*sizeof( IType /*uchar4*/ ), cudaMemcpyDeviceToHost ) );

// save array as image 

   IplImage *img_out = cvCreateImage( cvSize( imgW, imgH ), IPL_DEPTH_8U, 3 );

	  img_out->imageData = (char*)h_out;

	  cvNamedWindow( "GPU", CV_WINDOW_AUTOSIZE );

	  cvShowImage( "GPU", img_out );   

	  cvWaitKey();

   cvDestroyAllWindows();

   cvReleaseImage( &img_out );

cutilSafeCall(cudaFreeHost( h_out ));

   cutilSafeCall(cudaFree( d_out ));

   cutilSafeCall(cudaFreeArray( cuArray ));

cudaThreadExit();

}

Kernel :

/ declare texture reference for 2D uchar texture

texture<uchar4, 2, cudaReadModeElementType> texRef;

cudaChannelFormatDesc uchar4tex = cudaCreateChannelDesc(32,32,32,32,cudaChannelFormatKindUnsigned );

__device__ IType make_color(float r, float g, float  b, float a){

	return

		((int)(a/**255.0f*/ ) << 24) |

		((int)(b/**255.0f*/ ) << 16) |

		((int)(g/**255.0f*/ ) <<  8) |

		((int)(r/**255.0f*/) <<  0);

}

__global__ 

void copy_kernel ( IType *dst, int imgW, int imgH ) {

   const int idx = blockIdx.x*blockDim.x + threadIdx.x;

   const int idy = blockIdx.y*blockDim.y + threadIdx.y;

const float x = ( float )idx + 0.5f;

   const float y = ( float )idy + 0.5f;

	if( idx < imgW && idy < imgH ) {

	  uchar4 rgba = tex2D( texRef, x, y );

	   dst[ (imgW*idy + idx) + 0 ] = make_color( rgba.x, rgba.y, rgba.z, 0 );

	}

}

Could somebody help me I’m getting really frustrated with those textures…

One question more: I would like to use texture only to load image to the device, and then convolve image with some filters … Is it right way ? or should I work with global memory ?

In advance thanks a lot for all answers !

mpwm

… I’ve made one step forward. Now I got ‘‘lena’’ without stripes, but only blue (!). What I did : I changed (added) computation of alpha channel to the function make_color. Now how can I get the right color-frame ? If I change x (0…3), I’ll get respectively blue, yellow and red colors of lena. But how can merge them to get right image ? Any idea ?! (I’m getting frustrated …@!$@!) Thank u very much for all answers.

This is my changed kernel:

// declare texture reference for 2D uchar texture

texture<uchar4, 2, cudaReadModeElementType> texRef;

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32,32,32,32,cudaChannelFormatKindUnsigned );

__device__ unsigned int make_color(float r, float g, float  b, float a){

	return

		((int)(a/**255.0f*/ ) << 24) |

		((int)(b/**255.0f*/ ) << 16) |

		((int)(g/**255.0f*/ ) <<  8) |

		((int)(r/**255.0f*/) <<  0);

}

__global__ 

void copy_kernel ( unsigned int *dst, int imgW, int imgH ) {

   const int idx = blockIdx.x*blockDim.x + threadIdx.x;

   const int idy = blockIdx.y*blockDim.y + threadIdx.y;

const float x = ( float )idx + 0.5f;

   const float y = ( float )idy + 0.5f;

	if( idx < imgW && idy < imgH ) {

	  uchar4 rgba = tex2D( texRef, x, y );

	  dst[ (imgW*idy + idx)+ X ] = make_color( rgba.x, rgba.y, rgba.z, rgba.w );

	}

}

… I’ve made one step forward. Now I got ‘‘lena’’ without stripes, but only blue (!). What I did : I changed (added) computation of alpha channel to the function make_color. Now how can I get the right color-frame ? If I change x (0…3), I’ll get respectively blue, yellow and red colors of lena. But how can merge them to get right image ? Any idea ?! (I’m getting frustrated …@!$@!) Thank u very much for all answers.

This is my changed kernel:

// declare texture reference for 2D uchar texture

texture<uchar4, 2, cudaReadModeElementType> texRef;

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32,32,32,32,cudaChannelFormatKindUnsigned );

__device__ unsigned int make_color(float r, float g, float  b, float a){

	return

		((int)(a/**255.0f*/ ) << 24) |

		((int)(b/**255.0f*/ ) << 16) |

		((int)(g/**255.0f*/ ) <<  8) |

		((int)(r/**255.0f*/) <<  0);

}

__global__ 

void copy_kernel ( unsigned int *dst, int imgW, int imgH ) {

   const int idx = blockIdx.x*blockDim.x + threadIdx.x;

   const int idy = blockIdx.y*blockDim.y + threadIdx.y;

const float x = ( float )idx + 0.5f;

   const float y = ( float )idy + 0.5f;

	if( idx < imgW && idy < imgH ) {

	  uchar4 rgba = tex2D( texRef, x, y );

	  dst[ (imgW*idy + idx)+ X ] = make_color( rgba.x, rgba.y, rgba.z, rgba.w );

	}

}

does really nobody know how textures work … Is it unusually to use textures ? Is it right way to use them for convolution (actually my aim is to implement Lucas and Kanade -> Which memory type should I use ?

does really nobody know how textures work … Is it unusually to use textures ? Is it right way to use them for convolution (actually my aim is to implement Lucas and Kanade -> Which memory type should I use ?

It’s hard to tell from what you are doing, but textures are really easy to work with. You just need to make sure that you define them correctly in terms of type.

by the way, why make_color and not make_uchar4 and why define the image as int and not uchar4, and finally does your image really have an alpha channel? (is it rgb, rgba, bgr or bgra). Are you using the same data type for the texture and the image?

It’s hard to tell from what you are doing, but textures are really easy to work with. You just need to make sure that you define them correctly in terms of type.

by the way, why make_color and not make_uchar4 and why define the image as int and not uchar4, and finally does your image really have an alpha channel? (is it rgb, rgba, bgr or bgra). Are you using the same data type for the texture and the image?

thank you for your answer, so my picture doesn’t have alpha channel and the image should be rgb; (as you advised me I changed every variables to uchar)

what I changed now is:

texture<uchar, 2, cudaReadModeElementType> texRef;

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8,8,8,8,cudaChannelFormatKindUnsigned );

and in the kernel:

if( idx < imgW && idy < imgH ) {

	  uchar rgba = tex2D( texRef, x, y );

	  dst[ (imgW*idy + idx) ] = rgba;

  }

but what I’m getting is … see attach.

How can I compute now right colors and why the image wasn’t complete depicted.

PS. Working with texture is not soooo easy … :) i’m working on it almost 5 days …:/ Please help …
lena3.jpg

thank you for your answer, so my picture doesn’t have alpha channel and the image should be rgb; (as you advised me I changed every variables to uchar)

what I changed now is:

texture<uchar, 2, cudaReadModeElementType> texRef;

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8,8,8,8,cudaChannelFormatKindUnsigned );

and in the kernel:

if( idx < imgW && idy < imgH ) {

	  uchar rgba = tex2D( texRef, x, y );

	  dst[ (imgW*idy + idx) ] = rgba;

  }

but what I’m getting is … see attach.

How can I compute now right colors and why the image wasn’t complete depicted.

PS. Working with texture is not soooo easy … :) i’m working on it almost 5 days …:/ Please help …

[quote name=‘mpwm’ post=‘1060863’ date=‘May 24 2010, 04:07 PM’]

thank you for your answer, so my picture doesn’t have alpha channel and the image should be rgb; (as you advised me I changed every variables to uchar)

what I changed now is:

[codebox]texture texRef;

{

uchar *in;

size_t stride;

cudaMallocPitch(&in, &stride, …);

cudaMemcpy2D(...);

cudaChannelFormatDesc desc = cudaCreateChannelDesc ();

// Assuming RGB image and width in pixels, we need width*3 as this is width*rgb

cudaBindTexture2D(NULL, &texRef, in, &desc, width*3, height, stride);

// Each kernel copies three pixels

dim3 dimGrid((height - 1)/16 + 1, (width - 1)/16 + 1);

dim3 dimBlock(16, 16);

KernelTexture <<< dimGrid, dimBlock >>> (out, outStride, height, width);

[/codebox]

In the kernel for a gray scale image you want

[codebox]global_ void KernelTexture(char *out, size_t outSride, int width, int height)

{

usigned int x = umul24(blockDim.x, blockIdx.x) + threadIdx.x;

usigned int y = umul24(blockDim.y, blockIdx.y) + threadIdx.y;

if (x > width || y > height)

return;

out[y*outSride + x] = tex2D(texRef, x, y);

}

[/codebox]

For an rgb image you probably want

[codebox]global_ void KernelTexture(char *out, size_t outSride, int width, int height)

{

usigned int x = (umul24(blockDim.x, blockIdx.x) + threadIdx.x)*3;

usigned int y = umul24(blockDim.y, blockIdx.y) + threadIdx.y;

// width is in pixels, not bytes

if (x > width*3 || y > height)

return;

out[y*outSride + x ] = tex2D(texRef, x , y);

out[y*outSride + x + 1] = tex2D(texRef, x + 1, y);

out[y*outSride + x + 2] = tex2D(texRef, x + 2, y);

}

[/codebox]

[quote name=‘mpwm’ post=‘1060863’ date=‘May 24 2010, 04:07 PM’]

thank you for your answer, so my picture doesn’t have alpha channel and the image should be rgb; (as you advised me I changed every variables to uchar)

what I changed now is:

[codebox]texture texRef;

{

uchar *in;

size_t stride;

cudaMallocPitch(&in, &stride, …);

cudaMemcpy2D(...);

cudaChannelFormatDesc desc = cudaCreateChannelDesc ();

// Assuming RGB image and width in pixels, we need width*3 as this is width*rgb

cudaBindTexture2D(NULL, &texRef, in, &desc, width*3, height, stride);

// Each kernel copies three pixels

dim3 dimGrid((height - 1)/16 + 1, (width - 1)/16 + 1);

dim3 dimBlock(16, 16);

KernelTexture <<< dimGrid, dimBlock >>> (out, outStride, height, width);

[/codebox]

In the kernel for a gray scale image you want

[codebox]global_ void KernelTexture(char *out, size_t outSride, int width, int height)

{

usigned int x = umul24(blockDim.x, blockIdx.x) + threadIdx.x;

usigned int y = umul24(blockDim.y, blockIdx.y) + threadIdx.y;

if (x > width || y > height)

return;

out[y*outSride + x] = tex2D(texRef, x, y);

}

[/codebox]

For an rgb image you probably want

[codebox]global_ void KernelTexture(char *out, size_t outSride, int width, int height)

{

usigned int x = (umul24(blockDim.x, blockIdx.x) + threadIdx.x)*3;

usigned int y = umul24(blockDim.y, blockIdx.y) + threadIdx.y;

// width is in pixels, not bytes

if (x > width*3 || y > height)

return;

out[y*outSride + x ] = tex2D(texRef, x , y);

out[y*outSride + x + 1] = tex2D(texRef, x + 1, y);

out[y*outSride + x + 2] = tex2D(texRef, x + 2, y);

}

[/codebox]

As long as it is not a strippy lena, its ok for the forums ;-)

As long as it is not a strippy lena, its ok for the forums ;-)

If you are using 2D arrays in CPU, be warned that dimensions get swapped when you move to Textures…

A[y] will need to accessed as “tex2D(y,x)”. I had this problem with 3D textures… and I had no image processing to see those beautiful striped, bluish lenas…

http://forums.nvidia.com/index.php?showtop…mp;#entry972023

If you are using 2D arrays in CPU, be warned that dimensions get swapped when you move to Textures…

A[y] will need to accessed as “tex2D(y,x)”. I had this problem with 3D textures… and I had no image processing to see those beautiful striped, bluish lenas…

http://forums.nvidia.com/index.php?showtop…mp;#entry972023