Cufft_R2C and Cufft_C2R are inaccurate

I described my problem here: Instability of CUFFT_R2C and CUFFT_C2R | Medical Imaging Solution

My testing codes for ifft (C2R) are attached.

using namespace std;

#include <stdio.h>
#include
#include <math.h>
#include <stdlib.h>
#include <string.h>

#include <cufft.h>

#define INFILE “x.DAT”
#define OUTFILE1 “X.DAT”
#define OUTFILE2 “xx.DAT”

#define NO_x1 (1024)
#define NO_x2 (1024)
#define NO_X1 (1024)// Run faster
#define NO_X2 (1024)

int main(int argc, char**argv){
int device,nx1,nx2;
FILE *fp;
if (cudaSetDevice(0)!=cudaSuccess){
cout<<“Error when initializing device2!”<<endl;
exit(-1);
}
cudaGetDevice(&device);
cout<<“I am occupying device-”<<device<<"; "<<endl;

float x = (cufftReal)malloc(NO_x1NO_x2sizeof(float));
cufftReal d_x;
cudaMalloc((void
*)&d_x,sizeof(cufftReal)NO_x1NO_x2);
cufftComplex d_X;
cudaMalloc((void
*)&d_X,sizeof(cufftComplex)NO_X1NO_X2);
cufftComplex X = (cufftComplex )malloc(NO_X1NO_X2sizeof(cufftComplex));
cufftHandle plan_cr;
if (cufftPlan2d(&plan_cr, NO_x1, NO_x2, CUFFT_C2R) != CUFFT_SUCCESS){
cout<<“CUFFT error: Plan creation failed???”<<endl;
return (1);
}
if((fp=fopen(“X.DAT”,“rb”))==NULL){
printf(“Can not open file %s.\n”,INFILE);
exit(1);
}
if (fread(X,sizeof(cufftComplex),NO_X1NO_X2,fp)!=NO_X1NO_X2){
printf(“Can not read file &s.\n”,INFILE);
exit(1);
}
fclose(fp);

cudaMemset(d_X,0,sizeof(cufftComplex)NO_X1NO_X2); // This line is for testing only
cudaMemcpy(d_X,X,sizeof(cufftComplex)513512,cudaMemcpyHostToDevice);

cudaMemset(d_x,0,sizeof(float)NO_x1NO_x2);
if (cufftExecC2R(plan_cr, d_X, d_x) != CUFFT_SUCCESS){
cout<<“CUFFT error: ExecR2C forward failed”<<endl;
return;
}

cudaMemcpy(x,d_x,sizeof(float)NO_x1NO_x2,cudaMemcpyDeviceToHost);

if ((fp=fopen(“xestimate.DAT”,“wb”))==NULL){
cout<<"Can not open file "<<OUTFILE1<<endl;
exit(0);
}
fwrite(x,sizeof(float),NO_x1*NO_x2,fp);
fclose(fp);
free(x);
free(X);
cudaFree(d_X);cudaFree(d_x);
cufftDestroy(plan_cr);
cudaThreadExit();
return(0);
}

Hi,

here is a small code I made (based on yours, so if some stuffs look similar it’s normal ;) ). It’s a R2C transform, followed by a C2R one. Obviously, the final output is the same as the initial input. So you can see how to use that properly, because I think there’re few things that are not really clear. Or maybe I just didn’t understand the purpose of your code, I must admit that I’m not on arrow these days (in that case just ignore my message please ^^)

using namespace std;

#include <iostream>
#include <cufft.h>
#include <stdio.h>

#define INFILE   "x.in"
#define OUTFILE1 "complex_x.out"
#define OUTFILE2 "x.out"
#define N1 1024
#define N2 1024

int main(int argc, char **argv){

int real_size   = N1*N2;
int complex_size= N1*(N2/2+1); //Output memory is N1(N/2+1), not (N1/2)(N2/2+1)

cufftReal    *real_x,*d_real_x;
cufftComplex *complex_x,*d_complex_x;

cufftHandle plan_r2c,plan_c2r;

FILE *fp;

//Creation of the plans
if(cufftPlan2d(&plan_r2c,N1,N2,CUFFT_R2C) != CUFFT_SUCCESS){
        cout<<"CUFFT ERROR : R2C plan creation failed"<<endl;
        exit(-1);
}

if(cufftPlan2d(&plan_c2r,N1,N2,CUFFT_C2R) != CUFFT_SUCCESS){
        cout<<"CUFFT ERROR : C2R plan creation failed"<<endl;
        exit(-1);
}

//Memory allocation
real_x = (cufftReal*)malloc(real_size*sizeof(cufftReal));
cudaMalloc((void**)&d_real_x,real_size*sizeof(cufftReal));

complex_x = (cufftComplex*)malloc(complex_size*sizeof(cufftComplex));
cudaMalloc((void**)&d_complex_x,complex_size*sizeof(cufftComplex));

//Data intialisation
for(int ii=0; ii<real_size; ii++) real_x[ii]=(double)ii/2.0; //Not the courage to put random stuff :p

if((fp=fopen(INFILE,"w"))==NULL){
        cout<<"Can't open file "<<INFILE<<endl;
        exit(-1);
}

for (int ii=0;ii<real_size;ii++) fprintf(fp,"%.1f\n",real_x[ii]); //Just in order to have a file for comparison

fclose(fp);

//FFT real to complex*************************************************
cudaMemcpy(d_real_x,real_x,real_size*sizeof(cufftReal),cudaMemcpyHostToDevice);

if(cufftExecR2C(plan_r2c,d_real_x,d_complex_x) != CUFFT_SUCCESS){
        cout<<"CUFFT error : ExecR2C failed"<<endl;
        exit(-1);
}

cudaMemcpy(complex_x,d_complex_x,complex_size*sizeof(cufftComplex),cudaMemcpyDeviceToHost);
//********************************************************************

if((fp=fopen(OUTFILE1,"w"))==NULL){
        cout<<"Can't open file "<<OUTFILE1<<endl;
        exit(-1);
}

for (int ii=0;ii<complex_size;ii++) fprintf(fp,"%f %f\n",complex_x[ii].x,complex_x[ii].y);

fclose(fp);

//FFT complex to real+++++++++++++++++++++++++++++++++++++++++++++++++
cudaMemcpy(d_complex_x,complex_x,complex_size*sizeof(cufftComplex),cudaMemcpyHostToDevice);

if(cufftExecC2R(plan_c2r,d_complex_x,d_real_x) != CUFFT_SUCCESS){
        cout<<"CUFFT error : ExecC2R failed"<<endl;
        exit(-1);
}

cudaMemcpy(real_x,d_real_x,real_size*sizeof(cufftReal),cudaMemcpyDeviceToHost);
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

if((fp=fopen(OUTFILE2,"w"))==NULL){
        cout<<"Can't open file "<<OUTFILE2<<endl;
        exit(-1);
}

for (int ii=0;ii<real_size;ii++) fprintf(fp,"%.1f\n",real_x[ii]/real_size); //cuFFT is unnormalized, you have to divise by N1*N2 to get the inital values

fclose(fp);


free(real_x);
free(complex_x);
cudaFree(d_real_x);
cudaFree(d_complex_x);

cufftDestroy(plan_r2c);
cufftDestroy(plan_c2r);

return(0);

}

PS : I’m french, sorry for my english ._.

In my code I have to define the real matrix of size [Nx][Ny][Nz+2], the last 2 elements coresponding to Nz and Nz+1 are just padding so that one can do a inplace transform.
The complex array is of size [Nx][Ny][Nz/2+1].
You have to use this or set the compatibility mode in a such way that the padding is disabled. The same applies for 2D arrays.

I posted this message without checking if there was a reply meanwhile. :)