interpreting memcheck errors

I am new to cuda programming and I am having trouble interpreting cuda-memcheck errors. Below is the output of cuda-memcheck. I am having trouble understanding the error description. cufftExecD2Z is called in getmcf.cu, normOTFc is called as a separate kernel with threads (64) and blocks(64,128). This error message makes it look like cufftExecD2Z is called from within normOTFc in the module getmcf.cu which is not the case.

I am using Cuda compilation tools, release 4.0, V0.2.1221 on CentOS release 5.7

CUDA error: cufftExecD2Z error========= Invalid global read of size 8
========= at 0x00001000 in getmcf.cu:2793:normOTFc
========= by thread (0,0,0) in block (0,0,61453)
========= Address 0x2aaaabad4810 is out of bounds

========= ERROR SUMMARY: 1 error

Here is my kernel normOTFc

global void normOTFc(double * dOTFc, double arrsz )
{
double tmpOTFc1;
double r;
long x, y, z;
int tid = threadIdx.x + blockIdx.x * blockDim.x + gridDim.x * blockIdx.y * blockDim.x;
if ( blockIdx.y < gridDim.y/2L +1L)
{
z = blockIdx.y;
}
else {
z = gridDim.y - blockIdx.y;
}
if ( blockIdx.x < gridDim.x/2L +1L)
{
y = blockIdx.x;
}
else {
y = gridDim.x - blockIdx.x;
}
if ( threadIdx.x < blockDim.x/2L +1L)
{
x = threadIdx.x;
}
else {
x = blockDim.x - threadIdx.x;
}
r = x
x + yy + zz;
tmpOTFc1 = dOTFc + tid;
*tmpOTFc1 = tmpOTFc1 * (arrszexp(-r/100.0));
}

Here is my printf(“\n”);
cudaMemGetInfo(&free,&total);
printf(“%d Before planr reverse fft KB free of total %d KB\n”,free/1024,total/1024);
cufftPlan3d(&planr, Nz, Ny, Nx,CUFFT_Z2D);

    if (cufftExecZ2D(planr,  devPtrOTFc,  (double *) devPtrOTFc) !=  (cufftResult) CUDA_SUCCESS )

{ printf(“CUDA error: cufftExecZ2D error”);
return 1;
}
destroy_fft_double_plan(planr);
printf(“\n”);
cudaMemGetInfo(&free,&total);
printf(“%d After Planr destroy KB free of total %d KB\n”,free/1024,total/1024);//
normOTFc<<<blocks,threads>>>((double *) OTFc, (double) arrsz );
//

    cufftPlan3d(&planf, Nz, Ny, Nx,CUFFT_D2Z);

printf(“\n”);
cudaMemGetInfo(&free,&total);
printf(“%d After Planf before forward KB free of total %d KB\n”,free/1024,total/1024);
if (cufftExecD2Z(planf, (double *) devPtrOTFc, devPtrOTFc) != (cufftResult) CUDA_SUCCESS)
{ printf (“CUDA error: cufftExecD2Z error”);
return 1;
}
destroy_fft_double_plan(planf);

    if (cudaMemcpy( OTFc, devPtrOTFc, sizeof( double) * Nz * Ny * (Nx+2L),  cudaMemcpyDeviceToHost) != (cudaError_t) CUDA_SUCCESS)

{
fprintf ( stderr , “cudaMemcpy error : ExecC2C Forward failed” ) ;
return 1;cod segment.