I don’t understand why the initial is interpolated to give an out put in which the initial values haveb been shifted by 0.5.
I am trying to have the initial array cuArray bound as a texture and at the moment simply read into an output array, but as I say the cuArray values are interpolated. Why is this, and how do I stop it?
[codebox]#define NTHREADS 128
#include <stdio.h>
texture<float, 1, cudaReadModeElementType> tex;
global void kernel(float *d_out);
//============================
int main()
{
//create memory for output from the device
float *d_out;
cudaMalloc((void**)&d_out, sizeof(float) * NTHREADS);
//create an array on the device
cudaArray* cuArray;
cudaMallocArray (&cuArray, &tex.channelDesc, NTHREADS, 1);
cudaBindTextureToArray (tex, cuArray);
tex.filterMode = cudaFilterModeLinear;
//create and fill data array with increasing values ie thread id
float *data = (float*)malloc(NTHREADS*sizeof(float));
for (int i = 0; i < NTHREADS; i++) data[i] = float(i);
cudaMemcpyToArray(cuArray, 0, 0, data, sizeof(float)*NTHREADS, cudaMemcpyHostToDevice);
kernel<<<1, NTHREADS>>>(d_out);
//create array to hold output on the host and copy output into it
float *h_out = (float*)malloc(sizeof(float)*NTHREADS);
cudaMemcpy(h_out, d_out, sizeof(float)*NTHREADS, cudaMemcpyDeviceToHost);
for (int i = 0; i < NTHREADS; i++) printf("%f\n", h_out[i]);
free(h_out);
free(data);
cudaFreeArray(cuArray);
cudaFree(d_out);
}
//===============================
global void kernel(float *d_out)
{
int threadid = threadIdx.x;
float x = tex1D(tex, float(threadid));
d_out[threadid] = x;
}
[/codebox]