Problem in Code

Dear All

In the code bellow when I printf the "comp1" in "block1" and gives wrong but consistently in a Geforce 740M and a K40. I have a working program to compare. If I print in "mult1" it gives Ok. Seems that the inverse FFT is not working. 

Any clues?

Thanks

Luis Gonçalves

***** Host Program **********************

	   cufftExecZ2Z(plan[1], (cufftDoubleComplex *)comp1,
        		   (cufftDoubleComplex *)comp1, CUFFT_FORWARD);
         cudaStreamSynchronize(stream[z5]);
         
			
			
			mult1<<<NRSAMPLES/32,32,0,stream[z5]>>>(rrcduo,comp1,z5);
	   cufftPlan1d(&plan[2], NRSAMPLES, CUFFT_Z2Z, 1);
         cufftSetStream(plan[2],stream[z5]);
			cudaStreamSynchronize(stream[z5]);
           
	
		   
		   cufftExecZ2Z(plan[2], (cufftDoubleComplex *)comp1,
        		   (cufftDoubleComplex *)comp1, CUFFT_INVERSE);
          cudaStreamSynchronize(stream[z5]);
           
			 
			 block1<<<NRSAMPLES/32,32,0,stream[z5]>>>(nant, ntaps,nusers,g,tapreal,tapimag,delaytran,comp1,timeuser+NRSAMPLES*(z5*nant));

global void mult1(complex1 *rrcduo,complex1 *comp1,int z5)
{
int i1=blockIdx.x * blockDim.x + threadIdx.x;
complex1 const6,const7;

/*  if ((z5==0) && (i1==500))

      printf("%lf %lf\n",comp1[500].r,comp1[500].i);
	  */
           const6.r=comp1[i1].r;
           const6.i=comp1[i1].i;
           const7.r=rrcduo[i1].r;
           const7.i=rrcduo[i1].i;
     	    comp1[i1].r=const6.r * (const7.r)-const6.i * (const7.i);
     	  comp1[i1].i=const6.r * (const7.i)+const6.i * (const7.r);
	/*	  if ((z5==0) && (i1==500))

      printf("%lf %lf\n",comp1[500].r,comp1[500].i);*/

}

global void block1(int nant, int ntaps,int nusers,int g,double *tapreal,double *tapimag,int *delaytran,
complex1 comp1,complex1 timeuser)
{
int i1=blockIdx.x * blockDim.x + threadIdx.x;
int u1,u2,nr,t1,u3;
complex1 const5,const6,const7;
/
g=z5 in this case
/
if ((g==0) && (i1==500))
printf("%lf %lf\n",(comp1+500)->r,(comp1+500)->i);
//return;

for(nr=0; nr < nant ; nr++)
{
const7.r=0;
const7.i=0;
u3=NRSAMPLES*(nr)+i1;
u1=gntaps+nrnusers*ntaps;
for(t1=0;t1 < ntaps;t1++)
{

	const5.r=tapreal[u1+t1];
	const5.i=tapimag[u1+t1];
    u2=delaytran[u1+t1];
		if ((i1-u2)>=0)
		{
			const6.r=comp1[i1-u2].r;
     		const6.i=comp1[i1-u2].i;
			const7.r+=(const6.r)*const5.r-(const6.i)*const5.i;
			const7.i+=(const6.r)*const5.i+(const6.i)*const5.r;
		}
}
timeuser[u3].r=const7.r;
timeuser[u3].i=const7.i;

}//Nrantennas
if ((g==0) && (i1==500))
printf("%lf %lf\n",timeuser[500].r,timeuser[500].i);
}

Solved. cuFFT in the inverse FFT do not divide the result by the dimension of the transform as usual, as the MATLAB. In this case by 1/NRSAMPLES.