Hello !
I am a beginner on cuda and I expect to realize some fast fourier transform whith cufft.h. For that, I made the following code in order to get an image, make the fft transform, reverse it and display the raw_image and the IFFT(FFT(raw_image)) to verify if the images are the same.
And… it for now doesn’t work. ^^
I guess this is the way I stored the data input to make the fft through the GPU but I do not see where is the bug. :/
The datas I collected come from a CImg. For this reason the image data are linearly dispatch along widthheightdepth*spectrum.
The link to the definition : http://cimg.eu/reference/group__cimg__storage.html
The code :
#include <iostream>
#include <stdio.h>
#include "CImg.h"
#include "cuda_runtime.h"
#include "cufft.h"
#include <cassert>
//g++ cufft_ok.cpp -I/usr/local/cuda-9.0/include -L/usr/local/cuda-9.0/lib64 -o cufft_ok -lX11 -lpthread -lcudart -lcufft
using namespace cimg_library;
int main(){
CImg<float>* p_image = new CImg<float>("image.bmp"); //the base image
CImg<float> &image = (*p_image);
float* data = image.data();
int size = image.size();
int width = image.width();
int height = image.height();
int depth = image.depth();
int spectrum = image.spectrum();
CImg<float> test_img(width,height,depth,1);
memcpy(test_img._data,data,width*height*depth*sizeof(float));
size_t work_size;
int fft_sz = test_img.size(); // Size of each FFT
int fft_nx = width;
int fft_ny = height;
int num_ffts = 1; // How many FFTs to do
cufftComplex *out_buf_d;
cufftReal *in_buf_h, *in_buf_d;
// Allocate buffers on host and device
in_buf_h = new float;
cudaMalloc(&in_buf_d, fft_sz*num_ffts*sizeof(float));
if (cudaGetLastError() != cudaSuccess){
fprintf(stderr, "Cuda error: Failed to allocate\n");
return -1;
}
cudaMalloc(&out_buf_d, fft_sz*num_ffts*sizeof(cufftComplex));
if (cudaGetLastError() != cudaSuccess){
fprintf(stderr, "Cuda error: Failed to allocate\n");
return -1;
}
// Fill input buffer with zeros and copy to device
if(cudaMemset(out_buf_d, 0, fft_sz*num_ffts*sizeof(cufftComplex)) != cudaSuccess){
fprintf(stderr,"Cuda error : Failed to set memory");
return -1;
}
// Copy image.data() into in_buf_h
memcpy(in_buf_h,image._data,sizeof(float)*fft_sz*num_ffts);
// Copy the data from host to device
if(cudaMemcpy(in_buf_d, in_buf_h, fft_sz*num_ffts*sizeof(float), cudaMemcpyHostToDevice) != cudaSuccess){
fprintf(stderr,"Cuda error : Failed to copy memory");
return -1;
}
// Plan num_ffts of size fft_sz
cufftHandle plan;
cufftCreate(&plan);
if(cufftMakePlan2d(plan, fft_nx, fft_ny, CUFFT_R2C, &work_size)!= CUFFT_SUCCESS){
fprintf(stderr,"Cufft error : Failed to make plan");
return -1;
}
//execute the plan
if(cufftExecR2C(plan, in_buf_d, out_buf_d) != CUFFT_SUCCESS){
fprintf(stderr,"Cufft error : Failed to execute the cufft");
return -1;
}
// Sync the device to flush the output
if(cudaDeviceSynchronize() != cudaSuccess){
fprintf(stderr,"Cuda error : Failed to synchronize memory");
return -1;
}
//Plan for ifft
if(cufftMakePlan2d(plan, fft_nx, fft_ny, CUFFT_C2R, &work_size) != CUFFT_SUCCESS){
fprintf(stderr,"Cufft error : Failed to make the plan");
return -1;
}
//execute the plan
if(cufftExecC2R(plan, out_buf_d, in_buf_d) != CUFFT_SUCCESS){
fprintf(stderr,"Cufft error : Failed to execute the cufft");
return -1;
}
// Sync the device to flush the output
if(cudaDeviceSynchronize() != cudaSuccess){
fprintf(stderr,"Cuda error : Failed to synchronize memory");
return -1;
}
// Bring back the final data on host
if(cudaMemcpy(in_buf_h,in_buf_d, fft_sz*num_ffts*sizeof(float), cudaMemcpyDeviceToHost) != cudaSuccess){
fprintf(stderr,"Cuda error : Failed to copy memory");
return -1;
}
//store of ifft(fft(image))
CImg<float> final(width,height,depth,1);
memcpy(final._data,in_buf_h,fft_sz*num_ffts*sizeof(float));
//Display images to compare visualy
(test_img,final).display("Gauche : image de départ ; Droite : IFFT(FFT(image))");
image.~CImg();
cufftDestroy(plan);
delete in_buf_h;
cudaFree(in_buf_d);
cudaFree(out_buf_d);
return 0;
}
I thank you for your time !
image.bmp (201 KB)