I am starting to learn to use CUDA textures to do interpolations. However, I continue to get incorrect results… I know that the mistake should be obvious and stupid…but i just can’t find it with my naked eye External Image
It is a very simple 1D linear interpolation…but i keep getting undesirable outputs…i.e. tex1D(texRef, x) doesn’t span fully from 0 to 1, but from 0.125 to 0.875… outputs of x less than 0.125 and 0.875 are clamped
here is the source code
[codebox]
#include<stdio.h>
//defining texture reference
texture<float,1,cudaReadModeElementType> texRef;
//texture<float,1,cudaReadModeNormalizedFloat> texRef;
global static void texTest(int n, float* d_odata, float* d_refdata)
{
int tid=threadIdx.x;
float x=((float)tid)/(float)n;
d_refdata[tid]=x;
d_odata[tid]=tex1D(texRef,x);
}
extern “C” void TEXTURE1D(int argc, char* argv)
{
const int n=32; //number of points
//create data, output and reference arrays
float h_data[8];
h_data[0]=0.0;
h_data[1]=1.0;
h_data[2]=2.0;
h_data[3]=3.0;
float* h_odata=NULL; float* h_refdata=NULL;
float* d_odata=NULL; float* d_refdata=NULL;
//allocate device memory
cudaMalloc((void**) &d_odata, sizeof(float) * n);
cudaMalloc((void**) &d_refdata, sizeof(float) * n);
//create channel descripiton
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc();
cudaArray* cuArray=NULL;
//allocate h_data in cuArray
cudaMallocArray( &cuArray, &channelDesc, 4, 1 );
cudaMemcpyToArray( cuArray, 0, 0, h_data, 4*sizeof(float), cudaMemcpyHostToDevice);
//set some additional parameters of texture
texRef.addressMode[0] = cudaAddressModeClamp;
texRef.filterMode = cudaFilterModeLinear;
texRef.normalized = true;
// Bind the array to the texture
cudaBindTextureToArray( texRef, cuArray, channelDesc);
dim3 blockSize(n,1,1);
dim3 gridSize(1,1,1);
//kernel call
texTest<<<gridSize, blockSize, 0>>>(n,d_odata,d_refdata);
cudaThreadSynchronize() ;
//malloc host arrays
h_odata = (float*) malloc(sizeof(float) * n);
h_refdata = (float*) malloc(sizeof(float) * n);
//copy odata and refdata from dev to host
cudaMemcpy(h_odata, d_odata, sizeof(float) * n, cudaMemcpyDeviceToHost);
cudaMemcpy(h_refdata, d_refdata, sizeof(float) * n, cudaMemcpyDeviceToHost);
cudaThreadSynchronize() ;
//print all to compare
printf(“for n=%d\n”,n);
for(int i=0; i<n; i++) {
printf(“ref: %f , tex interpolation: %f \n”, h_refdata[i],h_odata[i]);
}
FILE *pFile2;
pFile2 = fopen (“result.out”,“w”);
for(int i=0; i<n; i++) {
fprintf(pFile2, "ref: %f , tex interpolation: %f \n", h_refdata[i],h_odata[i]);
}
fclose (pFile2);
cudaFree(d_odata);
cudaFree(d_refdata);
cudaFreeArray(cuArray);
//free(h_data);
free(h_odata);
free(h_refdata);
}
[/codebox]
The output is as follows:
[codebox]ref: 0.000000 , tex interpolation: 0.000000
ref: 0.031250 , tex interpolation: 0.000000
ref: 0.062500 , tex interpolation: 0.000000
ref: 0.093750 , tex interpolation: 0.000000
ref: 0.125000 , tex interpolation: 0.000000
ref: 0.156250 , tex interpolation: 0.125000
ref: 0.187500 , tex interpolation: 0.250000
ref: 0.218750 , tex interpolation: 0.375000
ref: 0.250000 , tex interpolation: 0.500000
ref: 0.281250 , tex interpolation: 0.625000
ref: 0.312500 , tex interpolation: 0.750000
ref: 0.343750 , tex interpolation: 0.875000
ref: 0.375000 , tex interpolation: 1.000000
ref: 0.406250 , tex interpolation: 1.125000
ref: 0.437500 , tex interpolation: 1.250000
ref: 0.468750 , tex interpolation: 1.375000
ref: 0.500000 , tex interpolation: 1.500000
ref: 0.531250 , tex interpolation: 1.625000
ref: 0.562500 , tex interpolation: 1.750000
ref: 0.593750 , tex interpolation: 1.875000
ref: 0.625000 , tex interpolation: 2.000000
ref: 0.656250 , tex interpolation: 2.125000
ref: 0.687500 , tex interpolation: 2.250000
ref: 0.718750 , tex interpolation: 2.375000
ref: 0.750000 , tex interpolation: 2.500000
ref: 0.781250 , tex interpolation: 2.625000
ref: 0.812500 , tex interpolation: 2.750000
ref: 0.843750 , tex interpolation: 2.875000
ref: 0.875000 , tex interpolation: 3.000000
ref: 0.906250 , tex interpolation: 3.000000
ref: 0.937500 , tex interpolation: 3.000000
ref: 0.968750 , tex interpolation: 3.000000
[/codebox]