texture memory access code Please Help

I have a very simple kernel to copy data from the texture to linear device memory at d2_tex. However, I get unexpected results. This being my first code, I am pretty sure I am missing something. Please help. Data stored in host memory at h_tex is copied to d_tex in device memory. Texture reference tex is bound to linear memory d_tex and this is copied to d2_tex in the kernel. The code below should print 32 5s followed by 32 10s. However, it prints 64 5s.

#include <stdio.h>
#include <stdlib.h>
#include <cutil.h>

texture<int, 1, cudaReadModeElementType> tex;
global void TextureCopyKernel(int*,int*);
int main ()
{
int d_tex=NULL, d2_tex=NULL;
int size = 64
sizeof(int);
int h_tex[64],h2_tex[64];
for (int i=0;i<=63;i++)
{
h_tex[i]=(i<32)?5:10; // assigning data to the hosr memory
}
cudaMalloc((void
*) &d_tex, size);
cudaMalloc((void**) &d2_tex, size);
cudaMemcpy(d_tex, h_tex, size, cudaMemcpyHostToDevice);
cudaBindTexture(0, tex, d_tex, size);

int dimGrid = 2;
int dimBlock = 32;

TextureCopyKernel<<<dimGrid, dimBlock>>>(d_tex, d2_tex);
cudaMemcpy(h2_tex, d2_tex, size, cudaMemcpyDeviceToHost);

for (int i=0;i<64;i++)
{
printf(“\n %d \n”,h2_tex[i]);
}
cudaFree(d_tex);
cudaFree(d2_tex);
}

global void TextureCopyKernel( int *g_idata, int g_odata)
{
// calculate normalized texture coordinates
unsigned int x = blockIdx.x
blockDim.x + threadIdx.x;
if (x<32)
{
g_odata=tex1Dfetch(tex,x);
}
}

Why “if (x < 32)”???

in this case

"if (x < 64) {

//

}"

because of your declearation, x will gets the value from 0 to (2 * 32 - 1) = 63