Simple CUFFT cuda code not working...

Hello everybody,

I wrote the following CUDA code to test CUFFT.
I’m writing it on a PC without a CUDA enabled GPU therefore I’m debugging it in EMULATION MODE.

#include “cuda.h”
#include “cufft.h”
#include “cuFloatComplex.h”

int main () {

     int M=16;
 cufftHandle plan1D;
 cufftComplex *idata,*odata,*d,*t;
 cudaMalloc((void**)&idata, M*sizeof(cufftComplex));
 cudaMalloc((void**)&odata, M*sizeof(cufftComplex));
 cudaMemset(odata,0,M*sizeof(cufftComplex));
 
    d=(cufftComplex*) malloc(M*sizeof(cufftComplex));
 t=(cufftComplex*) malloc(M*sizeof(cufftComplex));
 for (int pp=0;pp<M;pp++) { d[pp].x=1;d[pp].y=0; }
 cudaMemcpy (idata, d, M*sizeof(cuffComplex), cudaMemcpyHostToDevice);
 
    cufftResult result; 
 result = cufftPlan1d(&plan1D, M, CUFFT_C2C,1); 
 result= cufftExecC2C(plan1D, idata,idata, CUFFT_FORWARD);
 cudaMemcpy (t, odata, M*sizeof(cuffComplex), cudaMemcpyDeviceToHost);

}

The code compiles and runs without signaling any error at runtime. But the command cufftExecC2C does not do anything and leaves unchanged the output, even if the return value (result variable) is CUFFT_SUCCES.

I’m using the correct libraries as I link to cuffemu.lib (cufft.lib compiles but result variable assumes CUFFT_INTERNAL_ERROR), so I cannot figure out what’s wrong with the code…Maybe you cannot use in emulation mode CUFFT?

Please help me.

Thank you,

Pietro

you use in-place transformation

result= cufftExecC2C(plan1D, idata,idata, CUFFT_FORWARD);

say idata —> idata (fourier coefficient)

but you want to copy “odata” to “t”

you should use out-of-place, also you have some typing error on “cuffComplex” (should be cufftComplex)

the following code corrects your error and works on my machine (not emulation mode)

[code]

include <stdio.h>

include <assert.h>

include <cufft.h>

include <cuda_runtime_api.h>

include <cutil_inline.h>

int main (int argc, char *argv )

{

int i ;

int M = 4;



cufftHandle plan1D;

cufftComplex *idata,*odata,*d,*t;

cutilSafeCall( cudaMalloc((void**)&idata, M*sizeof(cufftComplex)) );

cutilSafeCall( cudaMalloc((void**)&odata, M*sizeof(cufftComplex)) );

cudaMemset(odata,0,M*sizeof(cufftComplex));

d = (cufftComplex*) malloc(M*sizeof(cufftComplex));

assert( d ) ;

t=(cufftComplex*) malloc(M*sizeof(cufftComplex));

assert(t) ;

for (i=0; i < M ; i++) { 

	d[i].x = 1.0 ;

	d[i].y = 0.0 ; 

}

CUDA_SAFE_CALL( cudaMemcpy (idata, d, M*sizeof(cufftComplex), cudaMemcpyHostToDevice ) );

cufftResult result; 

result = cufftPlan1d(&plan1D, M, CUFFT_C2C,1); 

// result= cufftExecC2C(plan1D, idata,idata, CUFFT_FORWARD); // in-place

result= cufftExecC2C(plan1D, idata, odata, CUFFT_FORWARD); // out-of-place



CUDA_SAFE_CALL( cudaMemcpy (t, odata, M*sizeof(cufftComplex), cudaMemcpyDeviceToHost) ) ;



for (i=0; i<M; i++) { 

	printf("d[%d] = (%0.3f,%0.3f)\n", i, d[i].x, d[i].y);

 

}

printf("\n") ;

for (i=0; i<M; i++) { 

	printf("t[%d] = (%0.3f,%0.3f)\n", i, t[i].x, t[i].y);

 

}

}

[\code]