Dear all:
I try to do 1D FFT R2C (real to complex) in-place (batch mode), but
result is wrong if batch > 1.
The following code do 1D R2C with batch = 2
[codebox]#include <stdio.h>
#include <assert.h>
#include <cufft.h>
#include <cutil_inline.h>
#ifdef DO_DOUBLE
typedef double doublereal ;
typedef cufftDoubleComplex Complex;
typedef float doublereal ;
typedef cufftComplex Complex;
int main( int argc, char* argv )
{
int i, j ;
int batch = 2 ;
int n = 5 ;
doublereal *u = (doublereal *)malloc( sizeof(doublereal)*batch*n ) ;
assert( u ) ;
for(i = 0 ; i < batch*n ; i++){ u[i] = 0.0 ; }
for(j = 0 ; j < n ; j++){ u[j] = 1.0 ; }
doublereal *ptr = u ;
for(i = 1 ; i <= batch ; i++){
for( j = 0; j < n ; j++){
printf("u(%d,%d) = %13.7E\n", i, j, *ptr );
ptr++ ;
}
}
cufftHandle plan ;
#if defined (DO_DOUBLE)
cufftPlan1d(&plan, n, CUFFT_D2Z, batch );
cufftPlan1d(&plan, n, CUFFT_R2C, batch );
Complex *d_u ;
cutilSafeCall( cudaMalloc((void**)&d_u, sizeof(Complex)*batch*((n>>1)+1) ) );
CUDA_SAFE_CALL(cudaMemcpy( d_u, u, sizeof(doublereal)*batch*n , cudaMemcpyHostToDevice) );
#if defined (DO_DOUBLE)
cufftExecD2Z( plan, (cufftDoubleReal *)d_u, d_u );
cufftExecR2C( plan, (cufftReal *)d_u, d_u );
Complex *u_hat = (Complex *)malloc(sizeof(Complex)*batch*((n>>1)+1) );
CUDA_SAFE_CALL(cudaMemcpy( u_hat, d_u, sizeof(Complex)*batch*((n>>1)+1), cudaMemcpyDeviceToHost) );
Complex *qptr = u_hat ;
for(i = 1 ; i <= batch ; i++){
for( j = 0; j <= (n>>1) ; j++){
printf("cufft(u)(%d,%d) = (%13.7E, %13.7E)\n", i, j, qptr->x, qptr->y );
qptr++ ;
}
}
}
[/codebox]
the output is
u(1,0) = 1.0000000E+000
u(1,1) = 1.0000000E+000
u(1,2) = 1.0000000E+000
u(1,3) = 1.0000000E+000
u(1,4) = 1.0000000E+000
u(2,0) = 0.0000000E+000
u(2,1) = 0.0000000E+000
u(2,2) = 0.0000000E+000
u(2,3) = 0.0000000E+000
u(2,4) = 0.0000000E+000
cufft(u)(1,0) = (5.0000000E+000, 0.0000000E+000)
cufft(u)(1,1) = (0.0000000E+000, 0.0000000E+000)
cufft(u)(1,2) = (0.0000000E+000, 0.0000000E+000)
cufft(u)(2,0) = (8.0901700E-001, 0.0000000E+000)
cufft(u)(2,1) = (2.4999999E-001, 7.6942128E-001)
cufft(u)(2,2) = (-6.5450847E-001, 4.7552806E-001)
-
first sequence u(1,0:4) has correct result
cufft(u(1,0:4)) = [5 , 0, 0 ]
-
second sequence u(2,0:4) is a zero vector,
but its Fourier coefficent is not zero.
However if I use out-of-place, then it is O.K.
I think that this problem is related to my previous post
http://forums.nvidia.com/index.php?showtop…hl=in-place+FFT
since 2D FFT can be obtained by do two 1D FFT.
Does anyone have successful experience on “in-place R2C, batch mode” ?
ps: my platform is winxp pro64, vc2005, cuda 2.3, driver 190.38, GTX295