Below is a worked example of bilinear interpolation in a 2D texture, where texels are of an integer type. One thing to keep in mind is that the hardware interpolation has very low resolution, using a 1.8 fixed-point format internally (See section F.2. Linear Filtering of the CUDA C Programming Guide). So the granularity of the interpolation is going to be rather coarse and depending on your use case you may be better off doing your own interpolation. The output of the code below should look like this:
reading array straight
11.00000 12.00000 13.00000
21.00000 22.00000 23.00000
31.00000 32.00000 33.00000
253.00000 254.00000 255.00000
reading array shifted in x-direction
12.00000 13.00000 13.00000
22.00000 23.00000 23.00000
32.00000 33.00000 33.00000
254.00000 255.00000 255.00000
reading array shifted in y-direction
11.00000 12.00000 13.00000
16.00000 17.00000 18.00000
26.00000 27.00000 28.00000
142.00000 143.00000 144.00000
Here is the source code:
#include <stdlib.h>
#include <stdio.h>
// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR() \
do { \
/* Check synchronous errors, i.e. pre-launch */ \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
/* Check asynchronous errors, i.e. kernel failed (ULF) */ \
err = cudaThreadSynchronize(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
texture<unsigned short, 2, cudaReadModeNormalizedFloat> tex;
__global__ void kernel (int m, int n, float shift_x, float shift_y)
{
float val;
for (int row = 0; row < m; row++) {
for (int col = 0; col < n; col++) {
val = 65535.0 * tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
printf ("%12.5f ", val);
}
printf ("\n");
}
}
int main (void)
{
int m = 4; // height = #rows
int n = 3; // width = #columns
size_t pitch, tex_ofs;
unsigned short arr[4][3]= {{11,12,13},{21,22,23},{31,32,33},{253,254,255}};
unsigned short *arr_d = 0;
CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
tex.normalized = false;
tex.filterMode = cudaFilterModeLinear;
CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
n, m, pitch));
if (tex_ofs !=0) {
printf ("tex_ofs = %zu\n", tex_ofs);
return EXIT_FAILURE;
}
printf ("reading array straight\n");
kernel<<<1,1>>>(m, n, 0.0f, 0.0f);
CHECK_LAUNCH_ERROR();
CUDA_SAFE_CALL (cudaDeviceSynchronize());
printf ("reading array shifted in x-direction\n");
kernel<<<1,1>>>(m, n, 1.0f, 0.0f);
CHECK_LAUNCH_ERROR();
CUDA_SAFE_CALL (cudaDeviceSynchronize());
printf ("reading array shifted in y-direction\n");
kernel<<<1,1>>>(m, n, 0.0, -0.5f);
CUDA_SAFE_CALL (cudaDeviceSynchronize());
CUDA_SAFE_CALL (cudaFree (arr_d));
return EXIT_SUCCESS;
}