Very simple CUDA texture question...

I am starting to learn to use CUDA textures to do interpolations. However, I continue to get incorrect results… I know that the mistake should be obvious and stupid…but i just can’t find it with my naked eye External Image

It is a very simple 1D linear interpolation…but i keep getting undesirable outputs…i.e. tex1D(texRef, x) doesn’t span fully from 0 to 1, but from 0.125 to 0.875… outputs of x less than 0.125 and 0.875 are clamped

here is the source code

[codebox]

#include<stdio.h>

//defining texture reference

texture<float,1,cudaReadModeElementType> texRef;

//texture<float,1,cudaReadModeNormalizedFloat> texRef;

global static void texTest(int n, float* d_odata, float* d_refdata)

{

int tid=threadIdx.x;

float x=((float)tid)/(float)n;

d_refdata[tid]=x;

d_odata[tid]=tex1D(texRef,x);

}

extern “C” void TEXTURE1D(int argc, char* argv)

{

const int n=32; //number of points

//create data, output and reference arrays

float h_data[8];

h_data[0]=0.0;

h_data[1]=1.0;

h_data[2]=2.0;

h_data[3]=3.0;

float* h_odata=NULL; float* h_refdata=NULL;

float* d_odata=NULL; float* d_refdata=NULL;

//allocate device memory

cudaMalloc((void**) &d_odata, sizeof(float) * n);

cudaMalloc((void**) &d_refdata, sizeof(float) * n);

//create channel descripiton

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc();

cudaArray* cuArray=NULL;

//allocate h_data in cuArray

cudaMallocArray( &cuArray, &channelDesc, 4, 1 );

cudaMemcpyToArray( cuArray, 0, 0, h_data, 4*sizeof(float), cudaMemcpyHostToDevice);

//set some additional parameters of texture

texRef.addressMode[0] = cudaAddressModeClamp;

texRef.filterMode = cudaFilterModeLinear;

texRef.normalized = true;

// Bind the array to the texture

cudaBindTextureToArray( texRef, cuArray, channelDesc);

dim3 blockSize(n,1,1);

dim3 gridSize(1,1,1);

//kernel call

texTest<<<gridSize, blockSize, 0>>>(n,d_odata,d_refdata);

cudaThreadSynchronize() ;

//malloc host arrays

h_odata = (float*) malloc(sizeof(float) * n);

h_refdata = (float*) malloc(sizeof(float) * n);

//copy odata and refdata from dev to host

cudaMemcpy(h_odata, d_odata, sizeof(float) * n, cudaMemcpyDeviceToHost);

cudaMemcpy(h_refdata, d_refdata, sizeof(float) * n, cudaMemcpyDeviceToHost);

cudaThreadSynchronize() ;

//print all to compare

printf(“for n=%d\n”,n);

for(int i=0; i<n; i++) {

printf(“ref: %f , tex interpolation: %f \n”, h_refdata[i],h_odata[i]);

}

FILE *pFile2;

pFile2 = fopen (“result.out”,“w”);

for(int i=0; i<n; i++) {

fprintf(pFile2, "ref: %f , tex interpolation: %f \n", h_refdata[i],h_odata[i]);

}

fclose (pFile2);

cudaFree(d_odata);

cudaFree(d_refdata);

cudaFreeArray(cuArray);

//free(h_data);

free(h_odata);

free(h_refdata);

}

[/codebox]

The output is as follows:

[codebox]ref: 0.000000 , tex interpolation: 0.000000

ref: 0.031250 , tex interpolation: 0.000000

ref: 0.062500 , tex interpolation: 0.000000

ref: 0.093750 , tex interpolation: 0.000000

ref: 0.125000 , tex interpolation: 0.000000

ref: 0.156250 , tex interpolation: 0.125000

ref: 0.187500 , tex interpolation: 0.250000

ref: 0.218750 , tex interpolation: 0.375000

ref: 0.250000 , tex interpolation: 0.500000

ref: 0.281250 , tex interpolation: 0.625000

ref: 0.312500 , tex interpolation: 0.750000

ref: 0.343750 , tex interpolation: 0.875000

ref: 0.375000 , tex interpolation: 1.000000

ref: 0.406250 , tex interpolation: 1.125000

ref: 0.437500 , tex interpolation: 1.250000

ref: 0.468750 , tex interpolation: 1.375000

ref: 0.500000 , tex interpolation: 1.500000

ref: 0.531250 , tex interpolation: 1.625000

ref: 0.562500 , tex interpolation: 1.750000

ref: 0.593750 , tex interpolation: 1.875000

ref: 0.625000 , tex interpolation: 2.000000

ref: 0.656250 , tex interpolation: 2.125000

ref: 0.687500 , tex interpolation: 2.250000

ref: 0.718750 , tex interpolation: 2.375000

ref: 0.750000 , tex interpolation: 2.500000

ref: 0.781250 , tex interpolation: 2.625000

ref: 0.812500 , tex interpolation: 2.750000

ref: 0.843750 , tex interpolation: 2.875000

ref: 0.875000 , tex interpolation: 3.000000

ref: 0.906250 , tex interpolation: 3.000000

ref: 0.937500 , tex interpolation: 3.000000

ref: 0.968750 , tex interpolation: 3.000000

[/codebox]

One immediate thing from a quick glance… texel values are stored at the texel centres, not the edges. I usually expect to see texture references with “x+0.5f” in them (or some appropriate rescaling), not just plain “x”. I believe there’s an Appendix in the programming guide about this.

You are right… i found it in the last part of the programming guide. Thank you so much for your reply…