Wrong results in CUFFT!

luistm · March 15, 2011, 3:57pm

Hi.

I’m tring to use CUFFT to compute the complex fourier transform of some data, but the results are wrong.

Can anyone see a problem on the following code?

void k_CUDA_FFT(float **data_pointers, int* args){

int N = args[0];

// allocate space on device

    float2* d_InputSignal;

    cudaMalloc((void**)&d_InputSignal, N*sizeof(float2));

// copy data from host to device

    float2* h_InputSignal = new float2[N];

    for(int i=0; i< N; i++){

        h_InputSignal[i].x=data_pointers[0][i];

        h_InputSignal[i].y=data_pointers[1][i];

    }

    cudaMemcpy(d_InputSignal, h_InputSignal,N*sizeof(float2), cudaMemcpyHostToDevice );

// CUFFT plan

    cufftHandle plan;

    int size = sizeof(float2) * N;

    cufftPlan1d(&plan, size, CUFFT_C2C, 1);

/* executes FFT processes */

    if(args[1]==1)

        cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_FORWARD);

    //else

    //    cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_INVERSE);

// copy data from device to host

    cudaMemcpy(h_InputSignal,d_InputSignal,N*sizeof(float2),cudaMemcpyDeviceToHost);

    for(int i=0; i< N; i++){

       data_pointers[0][i] = h_InputSignal[i].x;

       data_pointers[1][i] = h_InputSignal[i].y;

    }

cufftDestroy(plan);

    cudaFree(h_InputSignal);

    cudaFree(d_InputSignal);

}

Input:

REAL:0.841471 0.909297 0.141120 -0.756802 -0.958924 -0.279415 0.656987 0.989358

IMAG:0.841471 0.909297 0.141120 -0.756802 -0.958924 -0.279415 0.656987 0.989358

The correct result should be :

REAL: 1.543091 0.799255 0.427607 0.136506 -0.181784 -0.686254 -2.258727 6.952074

IMAG: 0.332754 0.552790 0.662729 0.748840 0.842995 0.992223 1.457381 -1.267293

The wrong result is:

REAL: 1.543091 1.803324 1.575109 1.012566 0.405025 0.030154 0.028103 0.350289

IMAG: 0.332754 -0.238005 -0.809705 -1.081805 -0.935108 -0.462346 0.098292 0.488494

Thanks

luistm · March 15, 2011, 4:18pm

Hi again!

The problem is in “cufftPlan1d(&plan, size, CUFFT_C2C, 1);”.

Size should be the number of points of the FFT.

Thanks

Hi.

I’m tring to use CUFFT to compute the complex fourier transform of some data, but the results are wrong.

Can anyone see a problem on the following code?
void k_CUDA_FFT(float **data_pointers, int* args){

int N = args[0];

// allocate space on device

    float2* d_InputSignal;

    cudaMalloc((void**)&d_InputSignal, N*sizeof(float2));

// copy data from host to device

    float2* h_InputSignal = new float2[N];

    for(int i=0; i< N; i++){

        h_InputSignal[i].x=data_pointers[0][i];

        h_InputSignal[i].y=data_pointers[1][i];

    }

    cudaMemcpy(d_InputSignal, h_InputSignal,N*sizeof(float2), cudaMemcpyHostToDevice );

// CUFFT plan

    cufftHandle plan;

    int size = sizeof(float2) * N;

    cufftPlan1d(&plan, size, CUFFT_C2C, 1);

/* executes FFT processes */

    if(args[1]==1)

        cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_FORWARD);

    //else

    //    cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_INVERSE);

// copy data from device to host

    cudaMemcpy(h_InputSignal,d_InputSignal,N*sizeof(float2),cudaMemcpyDeviceToHost);

    for(int i=0; i< N; i++){

       data_pointers[0][i] = h_InputSignal[i].x;

       data_pointers[1][i] = h_InputSignal[i].y;

    }

cufftDestroy(plan);

    cudaFree(h_InputSignal);

    cudaFree(d_InputSignal);

}
Input:

REAL:0.841471 0.909297 0.141120 -0.756802 -0.958924 -0.279415 0.656987 0.989358

IMAG:0.841471 0.909297 0.141120 -0.756802 -0.958924 -0.279415 0.656987 0.989358

The correct result should be :

REAL: 1.543091 0.799255 0.427607 0.136506 -0.181784 -0.686254 -2.258727 6.952074

IMAG: 0.332754 0.552790 0.662729 0.748840 0.842995 0.992223 1.457381 -1.267293

The wrong result is:

REAL: 1.543091 1.803324 1.575109 1.012566 0.405025 0.030154 0.028103 0.350289

IMAG: 0.332754 -0.238005 -0.809705 -1.081805 -0.935108 -0.462346 0.098292 0.488494

Thanks

mfatica · March 15, 2011, 4:22pm

The problem should be in your call:
cufftPlan1d(&plan, size, CUFFT_C2C, 1);

size of the transform is in element, you are passing the number of bytes.
Try:

cufftPlan1d(&plan, N, CUFFT_C2C, 1);

luistm · March 19, 2011, 4:10pm

Now i have a problem with this code:

int N = args[0];

// allocate space on device

    float2* d_InputSignal;

    printf("Before cudaMalloc call\n");fflush(stdout);

    cudaMalloc((void**)&d_InputSignal, N*N*sizeof(float2));

// copy data from host to device

    float2* h_InputSignal = new float2[N];

    for(int i=0; i< N*N; i++){

        h_InputSignal[i].x=data_pointers[0][i];

        h_InputSignal[i].y=data_pointers[1][i];

    }

    printf("Before cudaMemcpy call\n");fflush(stdout);

    cudaMemcpy(d_InputSignal, h_InputSignal,N*N*sizeof(float2), cudaMemcpyHostToDevice );

// CUFFT plan

    cufftHandle plan;

    int size = N;

    printf("Before cudaPlan call\n");fflush(stdout);

    cufftPlan2d(&plan, size, size, CUFFT_C2C);

printf("cudaPlane return\n");fflush(stdout);

/* executes FFT processes */

    printf("Before cufft call\n");fflush(stdout);

    if(args[1]==1){

        cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_FORWARD);

    }

    else{

        cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_INVERSE);

}

    // copy data from device to host

    cudaMemcpy(h_InputSignal,d_InputSignal,N*N*sizeof(float2),cudaMemcpyDeviceToHost);

    for(int i=0; i< N*N; i++){

       data_pointers[0][i] = h_InputSignal[i].x;

       data_pointers[1][i] = h_InputSignal[i].y;

    }

It gives an error in cudaPlan2d:

Before cudaPlan call

cudaFft: malloc.c:3096: sYSMALLOc: Assertion `(old_top == (((mbinptr) (((char *) &((av)->bins[((1) - 1) * 2])) - __builtin_offsetof (struct malloc_chunk, fd)))) && old_size == 0) || ((unsigned long) (old_size) >= (unsigned long)((((__builtin_offsetof (struct malloc_chunk, fd_nextsize))+((2 * (sizeof(size_t))) - 1)) & ~((2 * (sizeof(size_t))) - 1))) && ((old_top)->size & 0x1) && ((unsigned long)old_end & pagemask) == 0)' failed.

luistm · March 22, 2011, 2:12pm

Now i have a problem with this code:

int N = args[0];

// allocate space on device

    float2* d_InputSignal;

    printf("Before cudaMalloc call\n");fflush(stdout);

    cudaMalloc((void**)&d_InputSignal, N*N*sizeof(float2));

// copy data from host to device

    float2* h_InputSignal = new float2[N];

    for(int i=0; i< N*N; i++){

        h_InputSignal[i].x=data_pointers[0][i];

        h_InputSignal[i].y=data_pointers[1][i];

    }

    printf("Before cudaMemcpy call\n");fflush(stdout);

    cudaMemcpy(d_InputSignal, h_InputSignal,N*N*sizeof(float2), cudaMemcpyHostToDevice );

// CUFFT plan

    cufftHandle plan;

    int size = N;

    printf("Before cudaPlan call\n");fflush(stdout);

    cufftPlan2d(&plan, size, size, CUFFT_C2C);

printf("cudaPlane return\n");fflush(stdout);

/* executes FFT processes */

    printf("Before cufft call\n");fflush(stdout);

    if(args[1]==1){

        cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_FORWARD);

    }

    else{

        cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_INVERSE);

}

    // copy data from device to host

    cudaMemcpy(h_InputSignal,d_InputSignal,N*N*sizeof(float2),cudaMemcpyDeviceToHost);

    for(int i=0; i< N*N; i++){

       data_pointers[0][i] = h_InputSignal[i].x;

       data_pointers[1][i] = h_InputSignal[i].y;

    }

It gives an error in cudaPlan2d:

Before cudaPlan call

cudaFft: malloc.c:3096: sYSMALLOc: Assertion `(old_top == (((mbinptr) (((char *) &((av)->bins[((1) - 1) * 2])) - __builtin_offsetof (struct malloc_chunk, fd)))) && old_size == 0) || ((unsigned long) (old_size) >= (unsigned long)((((__builtin_offsetof (struct malloc_chunk, fd_nextsize))+((2 * (sizeof(size_t))) - 1)) & ~((2 * (sizeof(size_t))) - 1))) && ((old_top)->size & 0x1) && ((unsigned long)old_end & pagemask) == 0)' failed.

The problem was here:

float2* h_InputSignal = new float2[N];

It should be

float2* h_InputSignal = new float2[N*N];

Topic		Replies	Views
Question about the CUFFT sample code CUDA Programming and Performance	1	2462	January 29, 2009
CUFFT run wrong CUDA Programming and Performance	16	2979	May 23, 2013
cufft question CUDA Programming and Performance	6	8751	March 9, 2009
Cufft 1D transform CUDA Programming and Performance	6	3090	January 8, 2015
Wrong results in cufft! GPU-Accelerated Libraries	2	1268	September 30, 2015
2D CUFFT wrong result GPU-Accelerated Libraries cufft	8	3199	November 7, 2023
Problem with cufft test (computing forward and inverse fft) CUDA Programming and Performance	1	453	July 6, 2020
cufft API CUDA Programming and Performance	0	2190	October 31, 2008
Wrong results produced by Cufft GPU-Accelerated Libraries	8	1180	October 4, 2018
cuFFT cufftPlan1d and cufftExecR2C issues GPU-Accelerated Libraries	4	2439	July 13, 2016

Wrong results in CUFFT!

Related topics