Hello,

I am using cuFFTW. I generate a simple time series, and do the FFT with the fftw_plan_dft_1d() using FORWARD and then do an inverse FFT with fftw_plan_dft_1d() using BACKWARD. All my values match i.e I am able to recover the original time series except the first value. The first value of the time series was 0.000000 and the recovered value is -0.000000.

This is a bit absurd as the value is 0 in any precision. So I am not sure how the minus sign is appearing.

I am pasting my code here :

```
#include <stdio.h>
#include <cufftw.h>
#include <math.h>
#include <stdlib.h>
//#define PROBLEM_SIZE 100
void do_my_dft(fftw_complex *in, fftw_complex *out, int PROBLEM_SIZE);
void do_my_idft(fftw_complex *in, fftw_complex *out, int PROBLEM_SIZE);
int main(int argc, char **argv) {
char *endptr;
int PROBLEM_SIZE=0;
PROBLEM_SIZE = strtol(
(const char *)argv[1],
&endptr,
10
);
int i=0;
FILE *fp1, *fp2, *fp4;
fftw_complex *in, *out;
in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*PROBLEM_SIZE);
out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex)*PROBLEM_SIZE);
// Taking sine time series
printf("Taking a simple sine time series\n");
double signal_xi;
double f_0 = 20.0;
double t_i;
double delta_t = 1/f_0;
fp1 = fopen("time_series.csv","w+");
fprintf(fp1,"\"xval\",\"yval\"\n");
for(i=0;i<PROBLEM_SIZE;i++) {
t_i = i*delta_t;
signal_xi = sin(2*180*f_0*t_i);
in[i][0] = signal_xi;
in[i][1] = 0.0;
out[i][0] = 0.0;
out[i][1] = 0.0;
//printf("%d %f \n",i,signal_xi);
fprintf(fp1,"%d,%f\n",i,signal_xi);
}
fclose(fp1);
// do the FFT
do_my_dft(in, out, PROBLEM_SIZE);
fp2 = fopen("fft_of_time_series.csv","w+");
fprintf(fp2,"\"xval\",\"yval\"\n");
for(i=0;i<PROBLEM_SIZE;i++) {
//printf("%f %f\n",out[i][0],out[i][1]);
fprintf(fp2,"%f,%f\n",out[i][0],out[i][1]);
}
fclose(fp2);
memset(in,0,sizeof(fftw_complex)*PROBLEM_SIZE);
printf("reset in to 0\n");
for(i=0;i<PROBLEM_SIZE;i++) {
printf("%f %f\n",in[i][0],in[i][1]);
}
// Calculate IDFT
printf("IDFT or rather IFFT of the function or original signal : \n");
// calculate IDFT
do_my_idft(out, in, PROBLEM_SIZE);
fp4 = fopen("recovered_signal_with_ifft.csv","w+");
fprintf(fp4,"\"xval\",\"yval\"\n");
for(i=0;i<PROBLEM_SIZE;i++) {
//printf("%f %f\n",in[i][0],in[i][1]);
fprintf(fp4,"%d,%f\n",i,in[i][0]);
}
fclose(fp4);
free(in);
free(out);
exit (0);
}
void do_my_dft(fftw_complex *in, fftw_complex *out, int PROBLEM_SIZE) {
fftw_plan p;
p = fftw_plan_dft_1d(PROBLEM_SIZE, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
fftw_execute(p);
fftw_destroy_plan(p);
}
void do_my_idft(fftw_complex *in, fftw_complex *out, int PROBLEM_SIZE) {
fftw_plan p;
p = fftw_plan_dft_1d(PROBLEM_SIZE, in, out, FFTW_BACKWARD, FFTW_ESTIMATE);
fftw_execute(p);
fftw_destroy_plan(p);
int i=0;
for(i=0;i<PROBLEM_SIZE;i++) {
out[i][0]=out[i][0]/PROBLEM_SIZE;
}
}
```

I compile with the following options :

```
$ nvcc simple_fft_nvcc_function.cu -lcufft -lcufftw -o simple_fft_nvcc_function -arch=sm_35
```

Where simple_fft_nvcc_function.cu is the file with the code pasted above.

I run the code as follows :

```
$ ./simple_fft_nvcc_function 200
```

The scenario occurs for all the problem sizes which I test for e.g 500, 1000, 2000, 8000 etc.

I am using a Tesla K20c.

```
$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2015 NVIDIA Corporation
Built on Mon_Feb_16_22:59:02_CST_2015
Cuda compilation tools, release 7.0, V7.0.27
```

My cuFFT/cuFFTW is the one included with my CUDA package and its headers and libraries are in my PATH.

```
$ uname -ar
Linux foomachine 3.16.0-77-generic #99~14.04.1-Ubuntu SMP Tue Jun 28 19:17:10 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux
```

What exactly am I doing wrong?

If more information is needed, please do ask.

Thanks and Regards,

- vihan