Linear interpolation with integer texture.

“Linear texture filtering may be done only for textures that are configured to return floating-point data.”

This statement is unclear. It can be interpreted in two ways.

  1. It’s fine if the texture memory is integer, so long as you access it using cudaReadModeNormalizedFloat.

  2. Everything, including the texture memory, must be float.

So which is it?

I assumed that 1) was the correct interpretation. I get valid reads for positions that exactly match a texel position, and random garbage otherwise (the values fluctuate when I read in two different kernels). I assume this happens because the hardware actually can’t do bilinear interpolation for integer data. Is that correct?

Thanks.

Below is a worked example of bilinear interpolation in a 2D texture, where texels are of an integer type. One thing to keep in mind is that the hardware interpolation has very low resolution, using a 1.8 fixed-point format internally (See section F.2. Linear Filtering of the CUDA C Programming Guide). So the granularity of the interpolation is going to be rather coarse and depending on your use case you may be better off doing your own interpolation. The output of the code below should look like this:

reading array straight
    11.00000      12.00000      13.00000
    21.00000      22.00000      23.00000
    31.00000      32.00000      33.00000
   253.00000     254.00000     255.00000
reading array shifted in x-direction
    12.00000      13.00000      13.00000
    22.00000      23.00000      23.00000
    32.00000      33.00000      33.00000
   254.00000     255.00000     255.00000
reading array shifted in y-direction
    11.00000      12.00000      13.00000
    16.00000      17.00000      18.00000
    26.00000      27.00000      28.00000
   142.00000     143.00000     144.00000

Here is the source code:

#include <stdlib.h>
#include <stdio.h>

// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call)                                          \
do {                                                                  \
    cudaError_t err = call;                                           \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR()                                          \
do {                                                                  \
    /* Check synchronous errors, i.e. pre-launch */                   \
    cudaError_t err = cudaGetLastError();                             \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString(err) );       \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
    /* Check asynchronous errors, i.e. kernel failed (ULF) */         \
    err = cudaThreadSynchronize();                                    \
    if (cudaSuccess != err) {                                         \
        fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
                 __FILE__, __LINE__, cudaGetErrorString( err) );      \
        exit(EXIT_FAILURE);                                           \
    }                                                                 \
} while (0)

texture<unsigned short, 2, cudaReadModeNormalizedFloat> tex;

__global__ void kernel (int m, int n, float shift_x, float shift_y) 
{
    float val;
    for (int row = 0; row < m; row++) {
        for (int col = 0; col < n; col++) {
            val = 65535.0 * tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
            printf ("%12.5f  ", val);
        }
        printf ("\n");
    }
}

int main (void)
{
    int m = 4; // height = #rows
    int n = 3; // width  = #columns
    size_t pitch, tex_ofs;
    unsigned short arr[4][3]= {{11,12,13},{21,22,23},{31,32,33},{253,254,255}};
    unsigned short *arr_d = 0;

    CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
    CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
                                n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
    tex.normalized = false;
    tex.filterMode = cudaFilterModeLinear;
    CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
                                       n, m, pitch));
    if (tex_ofs !=0) {
        printf ("tex_ofs = %zu\n", tex_ofs);
        return EXIT_FAILURE;
    }
    printf ("reading array straight\n");
    kernel<<<1,1>>>(m, n, 0.0f, 0.0f);
    CHECK_LAUNCH_ERROR();
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    printf ("reading array shifted in x-direction\n");
    kernel<<<1,1>>>(m, n, 1.0f, 0.0f);
    CHECK_LAUNCH_ERROR();
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    printf ("reading array shifted in y-direction\n");
    kernel<<<1,1>>>(m, n, 0.0, -0.5f);
    CUDA_SAFE_CALL (cudaDeviceSynchronize());
    CUDA_SAFE_CALL (cudaFree (arr_d));
    return EXIT_SUCCESS;
}

Thanks for the example. The error was a bad case of PEBKAC.

#if PIX_TYPE == 1
float mult = 255.0f;
#else
float mult = 65535.0f;
#endif

That should have been PIX_SIZE. Multiplying by 65535.0 instead of 255.0 rollovers to the correct value, unless there was an interpolation going on, in which case it yields garbage.