I’ve been having trouble getting CUDA to perform a correct bilinear interpolation on 2D textures of (signed or unsigned) short ints: interpolated values always seem to be rounded. However, interpolation on 8-bit integer data types seems to work fine and return unrounded values.
Test code (taken from the top answer of the following stack overflow question: http://stackoverflow.com/questions/17075617/setting-up-a-cuda-2d-unsigned-char-texture-for-linear-interpolation)
#include <stdlib.h>
#include <stdio.h>
#define TYPE unsigned char
#define TYPE_MAX 255.f
//#define TYPE unsigned short
//#define TYPE_MAX 65535.f
texture<TYPE, 2, cudaReadModeNormalizedFloat> tex;
__global__ void kernel (int m, int n, float shift_x, float shift_y)
{
float val;
for (int row = 0; row < m; row++) {
for (int col = 0; col < n; col++) {
val = TYPE_MAX * tex2D (tex, col+0.5f+shift_x, row+0.5f+shift_y);
printf ("%f ", val);
}
printf ("\n");
}
}
int main (void)
{
int m = 4; // height = #rows
int n = 3; // width = #columns
size_t pitch, tex_ofs;
TYPE arr[4][3]= {{11,12,13},{21,22,23},{31,32,33},{251,252,253}};
TYPE *arr_d = 0;
cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m);
cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice);
tex.normalized = false;
tex.filterMode = cudaFilterModeLinear;
cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
n, m, pitch);
if (tex_ofs !=0) {
printf ("tex_ofs = %zu\n", tex_ofs);
exit(1);
}
printf ("reading array straight\n");
kernel<<<1,1>>>(m, n, 0.0f, 0.0f);
cudaDeviceSynchronize();
printf ("reading array shifted in x-direction\n");
kernel<<<1,1>>>(m, n, 0.5f, 0.0f);
cudaDeviceSynchronize();
printf ("reading array shifted in y-direction\n");
kernel<<<1,1>>>(m, n, 0.0f, 0.5f);
cudaDeviceSynchronize();
cudaFree (arr_d);
return 0;
}
Results with TYPE #define’d as unsigned char look correct: interpolating between 11 and 12, for example, correctly returns 11.5. For ‘unsigned short’ types, however, the result seems to be rounded-to-nearest value (e.g., 11), and never returns something like 11.5.
I don’t see any documentation that indicates that the behavior between interpolating uchars and ushorts should be so different. Is this a hardware limitation? A hidden rounding mode being implemented in the assembly code? A bug?