FFTW output Vs CUDAFFT output Different outputs

cuda_beginner · April 10, 2008, 7:28pm

Hi,
I am new to CUDA and stuck in a really wierd problem. I have this FFT program implemented in FORTRAN.
Its a 2 * 2 * 2 FFT in 3d. N = 8

CASE 1: SINGLE PRECISION FFTW CALL
accuracy.f

 program test
  implicit none
  integer N
  parameter(N=8)
  integer plan
  complex in
  dimension in(N)
  integer i
  print *,'Input : '
  do i = 1,N
    in(i) = cmplx(float(i),float(i+1))
    print *,in(i)
  enddo

  call sfftw_plan_dft_3d ( plan, 2, 2, 2, in, in, FFTW_FORWARD, FFTW_ESTIMATE)
  call sfftw_execute ( plan )
  call sfftw_destroy_plan ( plan )
  print *,'Output : '
  do i = 1,N
    print *,in(i)
  enddo

  end

The output is:

Input :
(1.000000,2.000000)
(2.000000,3.000000)
(3.000000,4.000000)
(4.000000,5.000000)
(5.000000,6.000000)
(6.000000,7.000000)
(7.000000,8.000000)
(8.000000,9.000000)
Output :
(0.0000000E+00,0.0000000E+00)
(0.0000000E+00,0.0000000E+00)
(0.0000000E+00,0.0000000E+00)
(0.0000000E+00,0.0000000E+00)
(0.0000000E+00,0.0000000E+00)
(0.0000000E+00,0.0000000E+00)
(0.0000000E+00,0.0000000E+00)
(0.0000000E+00,0.0000000E+00)

CASE 2: CUDA FFT
accuracycuda.f

  program test
  implicit none
  integer N
  parameter(N=8)
  integer*8 plan
  complex in
  dimension in(N)
  integer i
  print *,'Input : '
  do i = 1,N
    in(i) = cmplx(float(i),float(i+1))
    print *,in(i)
  enddo
  call myfft(24,24,24,in, 1)
  print *,'Output :'
  do i = 1,N
    print *,in(i)
  enddo

  end

myfft.cu

#include <cufft.h>
#include <cutil.h>
#include <cuComplex.h>
#include “cuda.h”

extern “C” void myfft_(int *n1, int *n2, int *n3,cufftComplex *data, int *direction)
{
int Nx = *n1;
int Ny = *n2;
int Nz = *n3;
int dir = *direction;
cufftComplex *d_data;

    CUT_DEVICE_INIT();
    CUDA_SAFE_CALL(cudaMalloc((void**) &d_data, sizeof(cufftComplex)*Nx*Ny*Nz));

    CUDA_SAFE_CALL(cudaMemcpy(d_data, data, Nx*Ny*Nz*sizeof(cufftComplex), cudaMemcpyHostToDevice));

    cufftHandle plan1;
    CUDA_SAFE_CALL(cufftPlan3d(&plan1, Nz, Ny, Nx, CUFFT_C2C));

    if(dir<0)
            CUDA_SAFE_CALL(cufftExecC2C(plan1, (cufftComplex *)d_data, (cufftComplex *)d_data, CUFFT_FORWARD));
    else
            CUDA_SAFE_CALL(cufftExecC2C(plan1, (cufftComplex *)d_data, (cufftComplex *)d_data, CUFFT_INVERSE));

    CUDA_SAFE_CALL(cudaMemcpy(data, d_data, Nx*Ny*Nz*sizeof(cufftComplex), cudaMemcpyDeviceToHost));

    CUDA_SAFE_CALL(cufftDestroy(plan1));

    cudaFree(d_data);
    return;

}

The output is:

Input :
(1.000000,2.000000)
(2.000000,3.000000)
(3.000000,4.000000)
(4.000000,5.000000)
(5.000000,6.000000)
(6.000000,7.000000)
(7.000000,8.000000)
(8.000000,9.000000)
Output :
(36.00000,44.00000)
(-4.000000,-4.000000)
(-8.000000,-8.000000)
(0.0000000E+00,0.0000000E+00)
(-16.00000,-16.00000)
(0.0000000E+00,0.0000000E+00)
(0.0000000E+00,0.0000000E+00)
(0.0000000E+00,0.0000000E+00)

Can someone please advice why this difference in output ? I assume that CUDA FFT is based on FFTW model. So for same input, how can the output be different.

Any help would be appreciated.
Thanks

vpodlozhnyuk · April 11, 2008, 11:07am

The first output is definitely incorrect, because the input is non-zero, while the second one is correct at least in DC term as I far as I can see.

oem2008 · May 6, 2008, 8:49pm

Try this… I use fftw-3.1.2 and compile with -lfftw3f…don’t for get to include fftw3.f…

program test

implicit none

integer N

  parameter(N=8)

  integer plan

  complex in

  dimension in(N)

integer i

include ‘fftw3.f’

print *,'Input : ’

  do i = 1,N

     in(i) = cmplx(float(i),float(i+1))

  print *,in(i)

  enddo

call sfftw_plan_dft_3d ( plan, 2, 2, 2, in, in, FFTW_FORWARD,

 &     FFTW_ESTIMATE)

  call sfftw_execute ( plan )

  call sfftw_destroy_plan ( plan )

print *,'Output : ’

  do i = 1,N

     print *,in(i)

  enddo

end

This is the output…

Input :

( 1.000000 , 2.000000 )

( 2.000000 , 3.000000 )

( 3.000000 , 4.000000 )

( 4.000000 , 5.000000 )

( 5.000000 , 6.000000 )

( 6.000000 , 7.000000 )

( 7.000000 , 8.000000 )

( 8.000000 , 9.000000 )

Output :

( 36.00000 , 44.00000 )

( -4.000000 , -4.000000 )

( -8.000000 , -8.000000 )

( 0.000000 , 0.000000 )

( -16.00000 , -16.00000 )

( 0.000000 , 0.000000 )

Hi,

I am new to CUDA and stuck in a really wierd problem. I have this FFT program implemented in FORTRAN.

Its a 2 * 2 * 2 FFT in 3d. N = 8

CASE 1: SINGLE PRECISION FFTW CALL

accuracy.f

program test

implicit none

integer N

parameter(N=8)

integer plan

complex in

dimension in(N)

integer i

print *,'Input : ’

do i = 1,N

in(i) = cmplx(float(i),float(i+1))

print *,in(i)

enddo

call sfftw_plan_dft_3d ( plan, 2, 2, 2, in, in, FFTW_FORWARD, FFTW_ESTIMATE)

call sfftw_execute ( plan )

call sfftw_destroy_plan ( plan )

print *,'Output : ’

do i = 1,N

print *,in(i)

enddo

end

The output is:

Input :

(1.000000,2.000000)

(2.000000,3.000000)

(3.000000,4.000000)

(4.000000,5.000000)

(5.000000,6.000000)

(6.000000,7.000000)

(7.000000,8.000000)

(8.000000,9.000000)

Output :

(0.0000000E+00,0.0000000E+00)

(0.0000000E+00,0.0000000E+00)

(0.0000000E+00,0.0000000E+00)

(0.0000000E+00,0.0000000E+00)

(0.0000000E+00,0.0000000E+00)

(0.0000000E+00,0.0000000E+00)

(0.0000000E+00,0.0000000E+00)

(0.0000000E+00,0.0000000E+00)

CASE 2: CUDA FFT

accuracycuda.f

program test

implicit none

integer N

parameter(N=8)

integer*8 plan

complex in

dimension in(N)

integer i

print *,'Input : ’

do i = 1,N

in(i) = cmplx(float(i),float(i+1))

print *,in(i)

enddo

call myfft(24,24,24,in, 1)

print *,‘Output :’

do i = 1,N

print *,in(i)

enddo

end

myfft.cu

include <cufft.h>

include <cutil.h>

include <cuComplex.h>

include “cuda.h”

extern “C” void myfft_(int *n1, int *n2, int *n3,cufftComplex *data, int *direction)

{

int Nx = *n1;

int Ny = *n2;

int Nz = *n3;

int dir = *direction;

cufftComplex *d_data;

CUT_DEVICE_INIT();

CUDA_SAFE_CALL(cudaMalloc((void**) &d_data, sizeof(cufftComplex)NxNy*Nz));

CUDA_SAFE_CALL(cudaMemcpy(d_data, data, NxNyNz*sizeof(cufftComplex), cudaMemcpyHostToDevice));

cufftHandle plan1;

CUDA_SAFE_CALL(cufftPlan3d(&plan1, Nz, Ny, Nx, CUFFT_C2C));

if(dir<0)

CUDA_SAFE_CALL(cufftExecC2C(plan1, (cufftComplex *)d_data, (cufftComplex *)d_data, CUFFT_FORWARD));

else

CUDA_SAFE_CALL(cufftExecC2C(plan1, (cufftComplex *)d_data, (cufftComplex *)d_data, CUFFT_INVERSE));

CUDA_SAFE_CALL(cudaMemcpy(data, d_data, NxNyNz*sizeof(cufftComplex), cudaMemcpyDeviceToHost));

CUDA_SAFE_CALL(cufftDestroy(plan1));

cudaFree(d_data);

return;

}

The output is:

Input :

(1.000000,2.000000)

(2.000000,3.000000)

(3.000000,4.000000)

(4.000000,5.000000)

(5.000000,6.000000)

(6.000000,7.000000)

(7.000000,8.000000)

(8.000000,9.000000)

Output :

(36.00000,44.00000)

(-4.000000,-4.000000)

(-8.000000,-8.000000)

(0.0000000E+00,0.0000000E+00)

(-16.00000,-16.00000)

(0.0000000E+00,0.0000000E+00)

(0.0000000E+00,0.0000000E+00)

(0.0000000E+00,0.0000000E+00)

Can someone please advice why this difference in output ? I assume that CUDA FFT is based on FFTW model. So for same input, how can the output be different.

Any help would be appreciated.

Thanks

[snapback]360131[/snapback]

Topic		Replies	Views
CUFFT run wrong CUDA Programming and Performance	16	2804	May 23, 2013
cuFFT vs fftw3 vs Octave (fftw3?) cuFFT does not agree with fftw3 (R2C) CUDA Programming and Performance	2	1320	September 1, 2009
2D FFT- Real to Complex and Complex to Real CUDA Programming and Performance	0	2172	February 3, 2009
CUFFT issue CUDA Programming and Performance	0	1105	December 29, 2009
FFTW Vs CUFFT Performance CUDA Programming and Performance	1	11119	March 6, 2008
Questions about cuFFT for 3D matrix, arrayFire GPU-Accelerated Libraries	5	1644	October 12, 2021
CUFFT appears to give errors for vectors > 1024 CUDA Programming and Performance	6	8764	April 12, 2007
cuFFT and fftw CUDA Programming and Performance	10	4134	August 25, 2010
Fortran and cuFFT CUDA Programming and Performance	8	17823	September 19, 2009
accuracy of CUFFT under double precision CUDA Programming and Performance	9	4137	September 18, 2009

FFTW output Vs CUDAFFT output Different outputs

CASE 1: SINGLE PRECISION FFTW CALL accuracy.f

myfft.cu

Related topics

CASE 1: SINGLE PRECISION FFTW CALL
accuracy.f