CUDA Texture Memory Example for Beginners

Hello everyone!

For my school project, I need to do interpolation using texture memory. However, I know very little about texture memory and cannot use it. Could you show very very basic code that uses texture memory or direct me to it?

Best,

1 Like

CUDA ships with a whole bunch of example applications, including some that demonstrate the use of textures. I would suggest having a look at those. Below is a brief example code I cooked up myself. The output should look like so:

reading array straight
    10.00000      20.00000      30.00000
    40.00000      50.00000      60.00000
    70.00000      80.00000      90.00000
   100.00000     110.00000     120.00000
reading array shifted 0.5 in x-direction
    15.00000      25.00000      30.00000
    45.00000      55.00000      60.00000
    75.00000      85.00000      90.00000
   105.00000     115.00000     120.00000
reading array shifted 0.5 in y-direction
    10.00000      20.00000      30.00000
    25.00000      35.00000      45.00000
    55.00000      65.00000      75.00000
    85.00000      95.00000     105.00000

#include <stdlib.h>
#include <stdio.h>

// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call)                                          \
do {                                                                  \
    cudaError_t err = call;                                           \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR()                                          \
do {                                                                  \
    /* Check synchronous errors, i.e. pre-launch */                   \
    cudaError_t err = cudaGetLastError();                             \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
    /* Check asynchronous errors, i.e. kernel failed (ULF) */         \
    err = cudaThreadSynchronize();                                    \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString( err) );      \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)

texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex;

__global__ void kernel (int m, int n, float shift_x, float shift_y) 
{
    float val;
    for (int row = 0; row < m; row++) {
        for (int col = 0; col < n; col++) {
            val = 255.0 * tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
            printf ("%12.5f  ", val);
        }
        printf ("\n");
    }
}

int main (void)
{
    int m = 4; // height = #rows
    int n = 3; // width  = #columns
    size_t pitch, tex_ofs;
    unsigned char arr[4][3]= {{10,20,30},{40,50,60},{70,80,90},{100,110,120}};
    unsigned char *arr_d = 0;

    CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
    CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
                                n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
    tex.normalized = false;
    tex.filterMode = cudaFilterModeLinear;
    CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
                                       n, m, pitch));
    if (tex_ofs !=0) {
        printf ("tex_ofs = %zu\n", tex_ofs);
        return EXIT_FAILURE;
    }
    printf ("reading array straight\n");
    kernel<<<1,1>>>(m, n, 0.0f, 0.0f);
    CHECK_LAUNCH_ERROR();
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    printf ("reading array shifted 0.5 in x-direction\n");
    kernel<<<1,1>>>(m, n, 0.5f, 0.0f);
    CHECK_LAUNCH_ERROR();
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    printf ("reading array shifted 0.5 in y-direction\n");
    kernel<<<1,1>>>(m, n, 0.0, -0.5f);
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    CUDA_SAFE_CALL (cudaFree (arr_d));
    return EXIT_SUCCESS;
}
1 Like

Canā€™t thank you enough for your help!

Nevertheless, I have some questions for you. If you would answer them I would appreciated.
1 - Your code does not have <cuda.h> or <cuda_runtime.h> libraries but works. How is that possible I could not understand.
2 - 3rd and 4th inputs to kernel function are written as 0.0f or 0.5f. What is the meaning of ā€œfā€ used here?
3 - In the kernel, val variable is defined as 255*tex2D(ā€¦). Why 255?
4 - why tex.normalized is false?
5 - Should not we unbind the texture after we are done with it?

Thank you a lot for your incredible help, it made me understand texture memory better.

Best,

When you compile a file with a .cu extension, nvcc automatically pulls in CUDA-specific header files.
f is a suffix for floating-point literal constants that makes them have type float. This is a C/C++ thing.
Yes, it would arguably be cleaner to unbind the texture, but since the apps exits anyhow there is really no need here.
tex.normalized = false because the program does not index into the texture with normalized coordinates (normalized means: in [0,1]).
Multiply with 255 because [0,1] is mapped to [0,255]

1 Like

@njuffa

I am using CUDA 12.2. While trying to compile I found issues with texture.


For the same code which you wrote above:

$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Jun_13_19:16:58_PDT_2023
Cuda compilation tools, release 12.2, V12.2.91
Build cuda_12.2.r12.2/compiler.32965470_0
$ nvcc tex.cu -o tex
tex.cu(35): error: texture is not a template
  texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex;
  ^

tex.cu(42): error: no instance of overloaded function "tex2D" matches the argument list
            argument types are: (<error-type>, float, float)
              val = 255.0 * tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
                            ^

tex.cu(62): error: identifier "cudaBindTexture2D" is undefined
      do { cudaError_t err = cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc, n, m, pitch); if (cudaSuccess != err) { fprintf (
                             ^

3 errors detected in the compilation of "tex.cu".

I found it no longer compiles on the latest CUDA.

I tried working the same out on an old setup:

$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Tue_May__3_18:49:52_PDT_2022
Cuda compilation tools, release 11.7, V11.7.64
Build cuda_11.7.r11.7/compiler.31294372_0
$ nvcc tex.cu -o tex                                                              
tex.cu(40): warning #1215-D: function "tex2D(texture<T, 2, cudaReadModeNormalizedFloat>, float, float) [with T=unsigned char]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/texture_fetch_functions.h(209): here was declared deprecated               

tex.cu: In function ā€˜int main()ā€™:
tex.cu:60:96: warning: ā€˜cudaError_t cudaBindTexture2D(size_t*, const textureReference*, const void*, const cudaChannelFormatDesc*, size_t, size_t, size_t)ā€™ is deprecated [-Wdeprecated-declarations]
   60 |     CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
      |                                                                                                ^
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:8749:46: note: declared here
 8749 | extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTexture2D(size_t *offset, const struct textureReference *texref, const void *devPtr, const struct cudaChannelFormatDesc *desc, size_t width, size_t height, size_t pitch);
      |                                              ^~~~~~~~~~~~~~~~~
tex.cu:60:96: warning: ā€˜cudaError_t cudaBindTexture2D(size_t*, const textureReference*, const void*, const cudaChannelFormatDesc*, size_t, size_t, size_t)ā€™ is deprecated [-Wdeprecated-declarations]
   60 |     CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
      |                                                                                                ^
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:8749:46: note: declared here
 8749 | extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTexture2D(size_t *offset, const struct textureReference *texref, const void *devPtr, const struct cudaChannelFormatDesc *desc, size_t width, size_t height, size_t pitch);
      |                                              ^~~~~~~~~~~~~~~~~
tex.cu:68:32: warning: ā€˜cudaError_t cudaThreadSynchronize()ā€™ is deprecated [-Wdeprecated-declarations]
   68 |     CHECK_LAUNCH_ERROR();
      |                                ^
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:1057:46: note: declared here
 1057 | extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(void);
      |                                              ^~~~~~~~~~~~~~~~~~~~~
tex.cu:68:32: warning: ā€˜cudaError_t cudaThreadSynchronize()ā€™ is deprecated [-Wdeprecated-declarations]
   68 |     CHECK_LAUNCH_ERROR();
      |                                ^
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:1057:46: note: declared here
 1057 | extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(void);
      |                                              ^~~~~~~~~~~~~~~~~~~~~
tex.cu:72:32: warning: ā€˜cudaError_t cudaThreadSynchronize()ā€™ is deprecated [-Wdeprecated-declarations]
   72 |     CHECK_LAUNCH_ERROR();
      |                                ^
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:1057:46: note: declared here
 1057 | extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(void);
      |                                              ^~~~~~~~~~~~~~~~~~~~~
tex.cu:72:32: warning: ā€˜cudaError_t cudaThreadSynchronize()ā€™ is deprecated [-Wdeprecated-declarations]
   72 |     CHECK_LAUNCH_ERROR();
      |                                ^
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:1057:46: note: declared here
 1057 | extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(void);

In the CUDA 11.7 version, nvcc warns about the deprecation. But it still compiles the code.

I learned that the texture reference is deprecated and one should use texture objects instead. Is it the case that the required definitions are no longer available in the latest CUDA 12.2?

NVIDIA removed support for texture references in CUDA 12.0. NVIDIA told CUDA programmers that they should switch to texture objects (as they planned to remove texture references) for five years prior to that.

2 Likes

see here and here and here.