cuFFT error!

I wrote the cufft sample code and tested it.
Test results using cos () seem to work well, but using sin () results in incorrect results.
Below is my code. And attachment is result.

#include <iostream>
#include <fstream>
#include <sstream>
#include <stdio.h>
#include <chrono>
#include "cufft.h"
#include "cuda_runtime.h"
#include <stdlib.h>  
#include <vector>

using namespace std;

/*
 * Create N fake samplings along the function cos(x). These samplings will be
 * stored as single-precision floating-point values.
 */
void generate_fake_samples(int N, float **out)
{
    int i;
    float *result = (float *)malloc(sizeof(float) * N);
    double delta = 2 * M_PI * 5;           // 5 = frequency
    double delta1 = 2 * M_PI * 10;          // 10 = frequency
    printf("delta = %f\n", delta);

    std::ofstream fout;
    fout.open("../signal_data.txt");
    for (i = 0; i < N; i++)
    {
        result[i] = sin(i * delta * 1/N); // + cos(i * delta1 * 1/N) + sin(i * delta1*2 * 1/N);
        fout << result[i];
        fout << "\n";
    }
    fout.close();

    *out = result;
}

/*
 * Convert a real-valued vector r of length Nto a complex-valued vector.
 */
void real_to_complex(float *r, cufftComplex **complx, int N)
{
    (*complx) = (cufftComplex *)malloc(sizeof(cufftComplex) * N);

    for (int i = 0; i < N; i++)
    {
        (*complx)[i].x = r[i];
        (*complx)[i].y = 0;
    }
}

int main(int argc, char *argv[])
{
    int N = 1024;                    // Input Array Size
    int BATCH = 1;
//    int resultSize = (N/2 + 1) * BATCH;       // FFT Result Size at 1-D FFT, Real-to-Complex
    int resultSize = N * BATCH;                 // FFT Result Size at 1-D FFT, Complex-to-Complex

    float * samples;

    cufftHandle plan;
    cufftComplex *inPtr;
    cufftComplex *complexSamples, *devPtr, *results;

    /* source data creation & Host memory allocation */
    generate_fake_samples(N, &samples);
    real_to_complex(samples, &complexSamples, N);
    results = (cufftComplex *) malloc(sizeof(cufftComplex) * resultSize);

    /* GPU memory allocation */
    cudaMalloc((void **)&inPtr, sizeof(cufftComplex) * N * BATCH);
    cudaMalloc((void **)&devPtr, sizeof(cufftComplex) * resultSize);

    /* transfer to GPU memory */
    cudaMemcpy(inPtr, complexSamples, sizeof(cufftComplex) * N * BATCH, cudaMemcpyHostToDevice);

    /* creates 1D FFT plan */
    cufftPlan1d(&plan, N, CUFFT_C2C, BATCH);

    /* executes FFT processes */
    cufftExecC2C(plan, inPtr, devPtr, CUFFT_FORWARD);

    /* executes FFT processes (inverse transformation) */
    // cufftExecC2R(plan, devPtr, inPtr); 

    /* transfer results from GPU memory */
    cudaMemcpy(results, devPtr, sizeof(cufftComplex) * resultSize, cudaMemcpyDeviceToHost);

    /* deletes CUFFT plan */
    cufftDestroy(plan);

    /* frees GPU memory */
    cudaFree(devPtr);

    printf("Fourier Coefficients:\n");

    float maximum = 0.0f;
    for (int i = 0; i < resultSize; i++)
    {
        if (fabs(results[i].x) > maximum)
        {
            maximum = fabs(results[i].x);
        }
    }

    std::ofstream fout;
    fout.open("../fft_data.txt");
    for (int i = 0; i < resultSize; i++)
    {
        printf("  %d: (%2.4f, %2.4f)\n", i + 1, results[i].x / maximum,
               results[i].y / maximum);
        float temp = results[i].x / maximum;
        fout << temp;
        fout << "\n";
    }
    fout.close();

    printf("  ...\n");

    free(samples);
    free(results);

    return 0;
}

cos.png
sin.png

The error is not in CUFFT but in your function that writes data to the output file, which is presumably the data you are using to generate your graphs.

Theory says that a FFT of a pure sine wave should produce a single spike in the imaginary plane, and FFT of a pure cosine wave should produce a single spike in the real plane. If we study the console printout of your program instead of the data written to the file, we see this borne out:

sine output:

$ ./t1000
delta = 31.415927
Fourier Coefficients:
  1: (0.0000, 0.0000)
  2: (0.2327, -0.1359)
  3: (0.0000, 0.0000)
  4: (0.4405, 0.2573)
  5: (0.0000, 0.0000)
  6: (0.0775, -57668960.0000)  //imaginary spike
  7: (0.0000, 0.0000)
  8: (0.0733, 0.2051)
  9: (0.0000, 0.0000)
  10: (-0.0759, -0.0754)
  11: (0.0000, 0.0000)
...
  1017: (0.0000, 0.0000)
  1018: (0.1378, -0.1404)
  1019: (0.0000, 0.0000)
  1020: (0.5789, 57668956.0000)  // output (complex) symmetry
  1021: (0.0000, 0.0000)
  1022: (0.3185, -0.2549)
  1023: (0.0000, 0.0000)
  1024: (0.2558, 0.1916)

cosine output:

$ ./t1000
delta = 31.415927
Fourier Coefficients:
  1: (-0.0000, 0.0000)
  2: (0.0000, 0.0000)
  3: (0.0000, 0.0000)
  4: (0.0000, -0.0000)
  5: (-0.0000, 0.0000)
  6: (1.0000, 0.0000)   // real spike
  7: (0.0000, 0.0000)
  8: (0.0000, -0.0000)
  9: (-0.0000, 0.0000)
  10: (0.0000, -0.0000)
  11: (0.0000, 0.0000)
...
  1017: (-0.0000, 0.0000)
  1018: (0.0000, 0.0000)
  1019: (0.0000, 0.0000)
  1020: (1.0000, -0.0000)  // output symmetry
  1021: (-0.0000, 0.0000)
  1022: (0.0000, 0.0000)
  1023: (0.0000, 0.0000)
  1024: (0.0000, -0.0000)
  ...

However here is the code that is writing the data to the output file:

fout.open("../fft_data.txt");
    for (int i = 0; i < resultSize; i++)
    {
        printf("  %d: (%2.4f, %2.4f)\n", i + 1, results[i].x / maximum,
               results[i].y / maximum);
        float temp = results[i].x / maximum;
        //           ^^^^^^^^^^^^
        //           ^^only real component!
        fout << temp;
        fout << "\n";
    }

It is only writing the real component! Likewise, your maximum-finding function (for output scaling) is only considering the real component:

float maximum = 0.0f;
    for (int i = 0; i < resultSize; i++)
    {
        if (fabs(results[i].x) > maximum)
        {
            maximum = fabs(results[i].x);
        }
    }

To display a proper scaled magnitude, you would need to do something like take the square root of the sum of the squares of the real and imaginary components, and then divide that by a normalizing coefficient which is computed from the peak magnitude, including both real and imaginary components.

As it is your output file misses the imaginary plane entirely. This is a defect in your code, not CUFFT.

txbob,
Thanks for your answer. Correct result after editing code.

float maximum = 0.0f;
    float * amplitude, * phase;
    for (int i = 0; i < resultSize; i++)
    {
        amplitude[i] = (float) sqrt(pow(results[i].x, 2) + pow(results[i].y, 2));
        if ((amplitude[i]) > maximum)
        {
            maximum = amplitude[i];
        }
    }