I’m having problem understaing how to use interpolation. I’ve written a small program:
there’s an array of size 2
float h_data[2];
h_data[0]=0.0;
h_data[1]=1.0;
and I’m trying to get values from n boints between those two elements. So, for example for n=4 I’m looking for values corresponding to
data[0.0]
data[0.25]
data[0.5]
data[0.75]
of course using linear interpolation.
I must be doing something wrong, cause the results are as follows:
CUDA initialized.
for n=20
ref: 0.000000 , tex interpolation: 0.000000
ref: 0.050000 , tex interpolation: 0.000000
ref: 0.100000 , tex interpolation: 0.000000
ref: 0.150000 , tex interpolation: 0.000000
ref: 0.200000 , tex interpolation: 0.000000
ref: 0.250000 , tex interpolation: 0.000000
ref: 0.300000 , tex interpolation: 0.000000
ref: 0.350000 , tex interpolation: 0.000000
ref: 0.400000 , tex interpolation: 0.000000
ref: 0.450000 , tex interpolation: 0.050000
ref: 0.500000 , tex interpolation: 0.100000
ref: 0.550000 , tex interpolation: 0.150000
ref: 0.600000 , tex interpolation: 0.200000
ref: 0.650000 , tex interpolation: 0.250000
ref: 0.700000 , tex interpolation: 0.300000
ref: 0.750000 , tex interpolation: 0.350000
ref: 0.800000 , tex interpolation: 0.400000
ref: 0.850000 , tex interpolation: 0.450000
ref: 0.900000 , tex interpolation: 0.500000
ref: 0.950000 , tex interpolation: 0.550000
Here’s the code:
kernel:
__global__ static void texTest(int n, float* d_odata, float* d_refdata)
{
int tid=threadIdx.x;
float x=((float)tid)/n;
d_refdata[tid]=x;
d_odata[tid]=tex1D(texRef,x);
}
texture reference:
//defining texture reference
texture<float,1,cudaReadModeElementType> texRef;
main:
int main(int argc, char* argv[])
{
if(!InitCUDA()) {
return 0;
}
const int n=20; //number of points
//create data, output and reference arrays
float h_data[2];
h_data[0]=0.0;
h_data[1]=1.0;
float* h_odata=NULL; float* h_refdata=NULL;
float* d_odata=NULL; float* d_refdata=NULL;
//allocate device memory
CUDA_SAFE_CALL( cudaMalloc((void**) &d_odata, sizeof(float) * n));
CUDA_SAFE_CALL( cudaMalloc((void**) &d_refdata, sizeof(float) * n));
//create channel descripiton
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaArray* cuArray=NULL;
//allocate h_data in cuArray
CUDA_SAFE_CALL( cudaMallocArray( &cuArray, &channelDesc, 2, 1 ));
CUDA_SAFE_CALL( cudaMemcpyToArray( cuArray, 0, 0, h_data, 2*sizeof(float), cudaMemcpyHostToDevice));
//set some additional parameters of texture
texRef.addressMode[0] = cudaAddressModeClamp;
texRef.filterMode = cudaFilterModeLinear;
texRef.normalized = false;
// Bind the array to the texture
CUDA_SAFE_CALL( cudaBindTextureToArray( texRef, cuArray, channelDesc));
dim3 blockSize(n,1,1);
dim3 gridSize(1,1,1);
//kernel call
texTest<<<gridSize, blockSize, 0>>>(n,d_odata,d_refdata);
CUDA_SAFE_CALL( cudaThreadSynchronize() );
//malloc host arrays
h_odata = (float*) malloc(n);
h_refdata = (float*) malloc(n);
//copy odata and refdata from dev to host
CUDA_SAFE_CALL( cudaMemcpy(h_odata, d_odata, sizeof(float) * n, cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL( cudaMemcpy(h_refdata, d_refdata, sizeof(float) * n, cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL( cudaThreadSynchronize() );
//print all to compare
printf("for n=%d\n",n);
for(int i=0; i<n; i++) {
printf("ref: %f , tex interpolation: %f \n", h_refdata[i],h_odata[i]);
}
CUDA_SAFE_CALL( cudaFree(d_odata));
CUDA_SAFE_CALL( cudaFree(d_refdata));
CUDA_SAFE_CALL( cudaFreeArray(cuArray));
free(h_data);
free(h_odata);
free(h_refdata);
return 0;
}
Also, the manual says this interpolation is “low precision”. How low exactly?