problem in using 2d texture fetch I am facing problem in how to get normalized values

Hi all

I want to use linear interpolation in 2d using texture memory in cuda

I want to interpolate
float p_input[17]= {10.8,8.3,7.3,7.2,8,9.9,10.3,9,7.2,8.9,12.42,12.1,10.8,8.3,7.3,7.2,8};
with 30 interval points
the output should be ( 200,360) 2d array

can anybody tell me where i am doing wrong
hear is my code

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <errno.h>
#include <err.h>
#include <sysexits.h>
#include <sys/types.h>
#include <sys/time.h>
#include <cuda.h>
#include <cutil_inline_runtime.h>
#include<cutil_math.h>
#define numrow 20
#define numcol 36
#include “cuPrintf.cu”

#define WIND_INTERVAL 30

texture<float, 2, cudaReadModeElementType> texRef;

global void computeInterp(int numInter,float* pInterp)

{
// Calculate normalized texture coordinates
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;//for col
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;//for row

// if ( x >= numcol || y >= numrow)
// {
// return;
// }
//printf(“x=%d,y=%d\n”,x,y);
int loc=0;
loc = numcolx+y;
if(loc<=numcol
numrow)
{
float tu=((float)(2.5f + 1.0f/WIND_INTERVAL + (float)x/WIND_INTERVAL));
float tv=((float)(2.5f + 1.0f/WIND_INTERVAL + (float)y/WIND_INTERVAL));

// Read from texture and write to global memory

pInterp[loc]=tex2D(texRef, y+0.5f, x+0.5f);
printf(“value of tu=%f,tv=%f,pInterp=%1.15f\n”,tu,tv,tex2D(texRef, tv, tu));
}
}
// Host code
int main()
{

 float p_WindDirection [17] ={-60,-30,0,30,60,90,120,150,180,210,240,270,300,330,360,390,420};
 float p_WindWeibullA[17]= {10.8,8.3,7.3,7.2,8,9.9,10.3,9,7.2,8.9,12.42,12.1,10.8,8.3,7.3,7.2,8};
 
    int num_wind_dir = 17;
    int num_wei_a = 17;
    int num_wei_k = 17;
    int num_wei_freq = 17;
    int num_wind_inter = 17;
    int num_wsv = 23;

    float *p_dev_AInterp;
    float *p_dev_KInterp;
    float *p_dev_FreqInterp;

float p_AInterp = (float)malloc(numrownumcolsizeof(float));
double *p_dev_WindSpeedVector;
// double *p_dev_WindHwTime;

    cutilSafeCall(cudaMalloc((void**)&p_dev_AInterp, numrow*numcol*sizeof(float)));
    cutilSafeCall(cudaMalloc((void**)&p_dev_KInterp, numrow*numcol*sizeof(float)));
    cutilSafeCall(cudaMalloc((void**)&p_dev_FreqInterp,numrow*numcol*sizeof(float)));
    cutilSafeCall(cudaMalloc((void**)&p_dev_WindSpeedVector, num_wsv*sizeof(double)));
//    cutilSafeCall(cudaMalloc((void**)&p_dev_WindHwTime, num_wind_inter*num_wsv*sizeof(double)));
    cudaMemcpy(p_dev_WindSpeedVector, p_WindSpeedVector, num_wsv*sizeof(double),cudaMemcpyHostToDevice);
 //   cudaMemset(p_dev_WindHwTime, 0, num_wind_inter*sizeof(double));

dim3 grid(30,30);
dim3 block(16,16);

// Allocate CUDA array in device memory
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray* cuArray;
cudaMallocArray(&cuArray, &texRef.channelDesc,num_wei_a, 1);

// Copy to device memory some data located at address h_data
// in host memory
// Weibull_A
cudaMemcpyToArray(cuArray, 0, 0, p_WindWeibullA, sizeof(float)*num_wei_a, cudaMemcpyHostToDevice);
// Bind the array to the texture reference
cudaBindTextureToArray(texRef, cuArray);
// Set texture parameters

texRef.filterMode = cudaFilterModeLinear;
texRef.normalized = true;

computeInterp<<<grid,block>>>(num_wind_inter, p_dev_AInterp);

cudaUnbindTexture(texRef);

    cudaMemcpy(p_AInterp, p_dev_AInterp, 20*36*sizeof(float), cudaMemcpyDeviceToHost);

float a_interp[20][36];

    cudaFree(p_dev_AInterp);
     cudaFreeArray(cuArray);

}

The output is coming as 8.000000
for every values of tu,and tv

can anybody help me to get my required output