I am trying to interpolate a 3d array, via Cuda texture memory. But getting weird values at its original coordinates.

I am trying to interpolate a 3D array. I am using cuda textures for its trilinear interpolation. I am not getting any error from ‘cuda-memcheck’. This code is working fine. But getting some weird answer also.
If I am passing value = 100 at (0,1,0), then its value should be same within an interpolated result.

#include <iostream>
#include <fstream>
#define NX 5
#define NY 5
#define NZ 1
#define IX 10
#define IY 10
#define IZ 2
#define ifact 2

texture<float, 3> tex;

__global__ void getInterpolatedFunctionValue(float *a, float *b, float *c,  float *result) // int IX, int IY, int IZ )
{
// Calculate normalized texture coordinates
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int z = blockIdx.z * blockDim.z + threadIdx.z;

if ((x < IX) && (y < IY) && (z < IZ))
{

	float w = a[x] , v = b[y], u = c[z]  ;
	result[z * IZ * IY + y * IZ + x ] = tex3D(tex, u, v, w );
}
}

#define cudaCheckErrors(msg) \
    do { \
        cudaError_t __err = cudaGetLastError(); \
        if (__err != cudaSuccess) { \
            fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
                msg, cudaGetErrorString(__err), \
                __FILE__, __LINE__); \
            fprintf(stderr, "*** FAILED - ABORTING\n"); \
            exit(1); \
        } \
    } while (0)

using namespace std;

int main(){

int nx=NX, ny=NY, nz=NZ, xx =IX, yy = IY, zz = IZ;
float fff[nz][ny][nx];
float x[xx], y[yy], z[zz] ;

for(int ix=0; ix<nx; ix++)
  for(int iy=0; iy<ny; iy++)
    for(int iz=0; iz<nz; iz++){
      fff[iz][iy][ix] = (ix + iy + iz ) * 100;
	std::cout<<fff[iz][iy][ix] <<"    "<< ix<<"   "<< iy<<"   "<< iz<<'\n' ;
    }

for(int ix=0; ix<IX; ix++)
  for(int iy=0; iy<IY; iy++)
    for(int iz=0; iz<IZ; iz++){
		x[ix] = (float) ix / 2 ;
		y[iy] = (float) iy / 2 ;
		z[iz] = (float) iz / 2 ;
	//	std::cout<< x[ix] << "    "<< y[iy]<<"    "<< z[iz]<< '\n' ;
	}
	
//*************************************************************************//
float *d_x, *d_y, *d_z, *d_result, *h_result ;

cudaMalloc((void**)&d_x, IX * sizeof(float));
cudaMalloc((void**)&d_y, IY * sizeof(float));
cudaMalloc((void**)&d_z, IZ * sizeof(float));
cudaMalloc((void**)&d_result, IX * IY * IZ * sizeof(float));

cudaMemcpy( d_x, x, IX * sizeof(float), cudaMemcpyHostToDevice );
cudaMemcpy( d_y, y, IY * sizeof(float), cudaMemcpyHostToDevice );
cudaMemcpy( d_z, z, IZ * sizeof(float), cudaMemcpyHostToDevice );

cudaCheckErrors("allocating an array is failed");
//*************************************************************************//
	

cudaArray *d_volumeArray ;

//const cudaExtent extent = make_cudaExtent(nx, ny, nz);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaMalloc3DArray(&d_volumeArray, &channelDesc, make_cudaExtent(nz, ny, nx));
cudaCheckErrors("cudaMalloc3D error");

cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr   = make_cudaPitchedPtr((void*)fff, sizeof(float)*nz,ny,nx);
copyParams.dstArray = d_volumeArray;
copyParams.dstPos   = make_cudaPos(0,0,0);
copyParams.srcPos   = make_cudaPos(0,0,0);
copyParams.extent   = make_cudaExtent(nz, ny, nx);
copyParams.kind     = cudaMemcpyHostToDevice;
cudaCheckErrors("copyParams3D fail");
cudaMemcpy3D(&copyParams);
cudaCheckErrors("cudaMemcpy3DParms fail");

tex.normalized = false;
tex.filterMode = cudaFilterModeLinear;
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;
tex.addressMode[2] = cudaAddressModeClamp;

cudaBindTextureToArray(tex, d_volumeArray, channelDesc);
cudaCheckErrors("bind fail");	

const dim3 blockSize(8, 8, 8 );
const dim3 gridSize(((IZ + blockSize.x )/blockSize.x),((IY + blockSize.y )/blockSize.y),((IX + blockSize.z)/blockSize.z));

printf("Blocksize.x = %i, blockSize.y = %i, blockSize.z = %i \n", blockSize.x, blockSize.y, blockSize.z);
printf("gridSize.x = %i, gridSize.y = %i, gridSize.z = %i \n", gridSize.x, gridSize.y, gridSize.z);

getInterpolatedFunctionValue<<<gridSize, blockSize>>>(d_x, d_y, d_z, d_result) ; // IX , IY , IZ ) ;

cudaCheckErrors("kernel fail");
cudaDeviceSynchronize();	
h_result = (float*) malloc(IX * IY * IZ * sizeof(float)) ;
cudaMemcpy( h_result, d_result, IX * IY * IZ * sizeof(float),cudaMemcpyDeviceToHost);		
cudaCheckErrors("cudaMemcpy fail");
  
cudaUnbindTexture(tex);
cudaCheckErrors("unbind fail");

cudaCheckErrors("cudaFree fail");
cudaFreeArray(d_volumeArray);
cudaCheckErrors("free fail");

printf("success!\n");

for(int ix=0; ix<IX; ix++)
  for(int iy=0; iy<IY; iy++)
    for(int iz=0; iz<IZ; iz++){
		std::cout<<h_result[iz * IZ * IY + iy * IZ + ix ] << "    "<< x[ix] << "    "<< y[iy]<<"    "<< z[iz]<< '\n' ;
	}

return 0;
}

Its result after cuda-memcheck

========= CUDA-MEMCHECK
0    0   0   0
100    0   1   0
200    0   2   0
300    0   3   0
400    0   4   0
100    1   0   0
200    1   1   0
300    1   2   0
400    1   3   0
500    1   4   0
200    2   0   0
300    2   1   0
400    2   2   0
500    2   3   0
600    2   4   0
300    3   0   0
400    3   1   0
500    3   2   0
600    3   3   0
700    3   4   0
400    4   0   0
500    4   1   0
600    4   2   0
700    4   3   0
800    4   4   0
Blocksize.x = 8, blockSize.y = 8, blockSize.z = 8 
gridSize.x = 1, gridSize.y = 2, gridSize.z = 2 
success!
0    0    0    0
0    0    0    0.5
0    0    0.5    0
0    0    0.5    0.5
150    0    1    0
150    0    1    0.5
150    0    1.5    0
150    0    1.5    0.5
200    0    2    0
200    0    2    0.5
250    0    2.5    0
250    0    2.5    0.5
350    0    3    0
350    0    3    0.5
350    0    3.5    0
350    0    3.5    0.5
350    0    4    0
400    0    4    0.5
400    0    4.5    0
450    0    4.5    0.5
0    0.5    0    0
0    0.5    0    0.5
0    0.5    0.5    0
0    0.5    0.5    0.5
200    0.5    1    0
200    0.5    1    0.5
200    0.5    1.5    0
200    0.5    1.5    0.5
250    0.5    2    0
250    0.5    2    0.5
300    0.5    2.5    0
300    0.5    2.5    0.5
400    0.5    3    0
400    0.5    3    0.5
400    0.5    3.5    0
400    0.5    3.5    0.5
350    0.5    4    0
450    0.5    4    0.5
400    0.5    4.5    0
500    0.5    4.5    0.5
0    1    0    0
0    1    0    0.5
150    1    0.5    0
150    1    0.5    0.5
150    1    1    0
150    1    1    0.5
200    1    1.5    0
200    1    1.5    0.5
250    1    2    0
250    1    2    0.5
350    1    2.5    0
350    1    2.5    0.5
350    1    3    0
350    1    3    0.5
350    1    3.5    0
400    1    3.5    0.5
400    1    4    0
450    1    4    0.5
0    1    4.5    0
550    1    4.5    0.5
0    1.5    0    0
0    1.5    0    0.5
200    1.5    0.5    0
200    1.5    0.5    0.5
200    1.5    1    0
200    1.5    1    0.5
250    1.5    1.5    0
250    1.5    1.5    0.5
300    1.5    2    0
300    1.5    2    0.5
400    1.5    2.5    0
400    1.5    2.5    0.5
400    1.5    3    0
400    1.5    3    0.5
350    1.5    3.5    0
450    1.5    3.5    0.5
400    1.5    4    0
500    1.5    4    0.5
0    1.5    4.5    0
600    1.5    4.5    0.5
150    2    0    0
150    2    0    0.5
150    2    0.5    0
150    2    0.5    0.5
200    2    1    0
200    2    1    0.5
250    2    1.5    0
250    2    1.5    0.5
350    2    2    0
350    2    2    0.5
350    2    2.5    0
350    2    2.5    0.5
350    2    3    0
400    2    3    0.5
400    2    3.5    0
450    2    3.5    0.5
0    2    4    0
550    2    4    0.5
0    2    4.5    0
550    2    4.5    0.5
200    2.5    0    0
200    2.5    0    0.5
200    2.5    0.5    0
200    2.5    0.5    0.5
250    2.5    1    0
250    2.5    1    0.5
300    2.5    1.5    0
300    2.5    1.5    0.5
400    2.5    2    0
400    2.5    2    0.5
400    2.5    2.5    0
400    2.5    2.5    0.5
350    2.5    3    0
450    2.5    3    0.5
400    2.5    3.5    0
500    2.5    3.5    0.5
0    2.5    4    0
600    2.5    4    0.5
0    2.5    4.5    0
600    2.5    4.5    0.5
150    3    0    0
150    3    0    0.5
200    3    0.5    0
200    3    0.5    0.5
250    3    1    0
250    3    1    0.5
350    3    1.5    0
350    3    1.5    0.5
350    3    2    0
350    3    2    0.5
350    3    2.5    0
400    3    2.5    0.5
400    3    3    0
450    3    3    0.5
0    3    3.5    0
550    3    3.5    0.5
0    3    4    0
550    3    4    0.5
150    3    4.5    0
650    3    4.5    0.5
200    3.5    0    0
200    3.5    0    0.5
250    3.5    0.5    0
250    3.5    0.5    0.5
300    3.5    1    0
300    3.5    1    0.5
400    3.5    1.5    0
400    3.5    1.5    0.5
400    3.5    2    0
400    3.5    2    0.5
350    3.5    2.5    0
450    3.5    2.5    0.5
400    3.5    3    0
500    3.5    3    0.5
0    3.5    3.5    0
600    3.5    3.5    0.5
0    3.5    4    0
600    3.5    4    0.5
200    3.5    4.5    0
700    3.5    4.5    0.5
200    4    0    0
200    4    0    0.5
250    4    0.5    0
250    4    0.5    0.5
350    4    1    0
350    4    1    0.5
350    4    1.5    0
350    4    1.5    0.5
350    4    2    0
400    4    2    0.5
400    4    2.5    0
450    4    2.5    0.5
0    4    3    0
550    4    3    0.5
0    4    3.5    0
550    4    3.5    0.5
150    4    4    0
650    4    4    0.5
150    4    4.5    0
0    4    4.5    0.5
250    4.5    0    0
250    4.5    0    0.5
300    4.5    0.5    0
300    4.5    0.5    0.5
400    4.5    1    0
400    4.5    1    0.5
400    4.5    1.5    0
400    4.5    1.5    0.5
350    4.5    2    0
450    4.5    2    0.5
400    4.5    2.5    0
500    4.5    2.5    0.5
0    4.5    3    0
600    4.5    3    0.5
0    4.5    3.5    0
600    4.5    3.5    0.5
200    4.5    4    0
700    4.5    4    0.5
200    4.5    4.5    0
0    4.5    4.5    0.5
========= CUDA-MEMCHECK
========= ERROR SUMMARY: 0 errors

Why am I getting value = 0 in output?
Where is my mistake and how could find my mistake?

You could study a code that works, such as any of the CUDA sample codes that do 3D texturing, or this one:

https://stackoverflow.com/questions/25591045/cuda-3d-texture-interpolation/25598107#25598107

You could also check your usage of each API function against the reference manual. For example, to pick one example, you have this:

copyParams.srcPtr   = make_cudaPitchedPtr((void*)fff, sizeof(float)*nz,ny,nx);
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Here’s the reference manual entry for make_cudaPitchedPtr:

https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gc3f66f8f11f9949768ae8d10cad5a1a0

(hint: your usage is not correct)

I’m not suggesting that if you fix this your code will suddenly start working. I’m offering some suggestions for you how you might go about fixing your code.