I’m trying to do a 2D image convolution with CUFFT, using the real-value functions, but it isn’t working. Just calling screenFFT and then retreiveIFFT (which should give me back my original image, with some scale factor) returns garbage that changes each time I call retrieveIFFT (it kinda resembles the input image on about the fourth or fifth call, though). Maybe someone could tell me where I’m going wrong here?
#include "/usr/local/cuda/include/cufft.h"
#include <math.h>
#include <stdio.h>
#define width 640
#define height 480
cufftReal *d_ipt;
cufftComplex *d_ipt_fft;
cufftHandle plan_R2C;
cufftHandle plan_C2R;
int mem_size_cplx = sizeof(cufftComplex) * height * width; //( width / 2 + 1 ); <---- this caused some "CUFFT_EXEC_FAILED" errors
int mem_size_re = sizeof(cufftReal) * width * height;
/* */
extern "C" {
int IMCON_init() {
cufftResult myResult;
cudaMalloc((void**)&d_ipt, mem_size_re);
cudaMalloc((void**)&d_ipt_fft, mem_size_cplx);
// Create CUFFTplan object .
myResult = cufftPlan2d(&plan_R2C, 480, 640, CUFFT_R2C);
if( myResult != CUFFT_SUCCESS ) { return 1; }
myResult = cufftPlan2d(&plan_C2R, 480, 640, CUFFT_C2R);
if( myResult != CUFFT_SUCCESS ) { return 2; }
else { return 0; }
}
/* */
void IMCON_quit() {
cufftDestroy( plan_C2R );
cufftDestroy( plan_R2C );
cudaFree( d_ipt );
cudaFree( d_ipt_fft );
}
/* */
int screenFFT( float* input ) {
cufftResult myResult1;
cudaMemcpy(d_ipt, (cufftReal*)input, mem_size_re, cudaMemcpyHostToDevice);
myResult1 = cufftExecR2C( plan_R2C, d_ipt, d_ipt_fft );
if( myResult1 != CUFFT_SUCCESS ) { return 1; }
else{ return 0; }
}
/* */
void retrieveIFFT( float* output ) {
// Test function that puts IFFT result back in the input buffer, and reads it back--should work but fails
cufftExecC2R( plan_C2R, d_ipt_fft, d_ipt );
cudaMemcpy((cufftReal*)output, d_ipt, mem_size_re, cudaMemcpyDeviceToHost);
}
/* */