I’m trying to get a 2D FFT out of CUFFT, but it doesn’t seem to be working. The code I’m working with is below. If I comment out the two cufftExecute() lines, then the image will come back as it went in. If they run, however, then I get back a screen of noise with what looks vaguely like the original image smeared horizontally the whole way across. The original image (the input to cufftExecute()) also gets corrupted, and the transformed data looks like noise.

Note that the image is 1024 x 650, monochrome, and stored in an array of floats. Pixels are horizontally-sequential (incrementing the index moves right a pixel).

Can anyone tell me what I’m doing wrong here?

```
#include "/usr/local/cuda/include/cufft.h"
#include <math.h>
cufftComplex *d_ipt1;
cufftComplex *d_ipt1_fft;
cufftComplex *d_ipt2;
cufftComplex *d_ipt2_fft;
cufftComplex *d_output;
cufftComplex *d_output_fft;
cufftHandle plan;
int mem_size = sizeof(cufftComplex) * 665600;
/* ========================== */
extern "C" {
int IMCON_init() {
cufftResult myResult;
// Allocate device memory for input / output
cudaMalloc((void**)&d_ipt1, mem_size);
cudaMalloc((void**)&d_ipt2, mem_size);
cudaMalloc((void**)&d_ipt1_fft, mem_size);
cudaMalloc((void**)&d_ipt2_fft, mem_size);
cudaMalloc((void**)&d_output, mem_size);
cudaMalloc((void**)&d_output_fft, mem_size);
// Create CUFFTplan object .
myResult = cufftPlan2d(&plan, 1024, 650, CUFFT_DATA_C2C);
if( myResult == CUFFT_SUCCESS ) { return 0; }
else { return 1; }
}
/* ===================== */
void IMCON_quit() {
cufftDestroy( plan );
cudaFree( d_ipt1 );
cudaFree( d_ipt2 );
cudaFree( d_ipt1_fft );
cudaFree( d_ipt2_fft );
cudaFree( d_output );
cudaFree( d_output_fft );
}
/* ======================== */
int screenFFT( float* input ) {
cufftResult myResult1;
cufftResult myResult2;
cufftComplex h_ipt1[ 665600 ];
for( int i = 0; i < 665600; i++ ) {
h_ipt1[ i ][ 0 ] = input[ i ];
h_ipt1[ i ][ 1 ] = 0;
}
// Copy host memory to device
cudaMemcpy(d_ipt1, h_ipt1, mem_size, cudaMemcpyHostToDevice);
myResult1 = cufftExecute(plan, d_ipt1, d_ipt1_fft, CUFFT_FORWARD);
myResult2 = cufftExecute(plan, d_ipt1_fft, d_ipt1, CUFFT_INVERSE);
if( myResult1 != CUFFT_SUCCESS or myResult2 != CUFFT_SUCCESS ) { return 1; }
else{ return 0; }
}
/* ============================== */
void retrieveFFTmags( float* output ) {
cufftComplex h_opt1[ 665600 ];
cudaMemcpy(h_opt1, d_ipt1, mem_size, cudaMemcpyDeviceToHost);
for( int i = 0; i < 665600; i++ ) {
output[ i ] = h_opt1[ i ][ 0 ];
}
}
}
```