Hi Guys,
I created the following code:
#include <cmath>
#include <stdio.h>
#include <cufft.h>
#include <cuda_runtime_api.h>
#include <cuda_runtime.h>
void cufft_1d_r2c(float* idata, int Size, float* odata) {
// Input data in GPU memory
float *gpu_idata;
// Output data in GPU memory
cufftComplex *gpu_odata;
// Temp output in host memory
cufftComplex host_signal;
// Allocate space for the data in the GPU's memory
cudaMalloc((void**)&gpu_idata, Size*sizeof(float));
if (cudaGetLastError() != cudaSuccess){
fprintf(stderr, "Cuda error: Failed to allocate memory for gpu_idata.\n");
return;
}
// Allocate space for the calculated fourier coefficients in the GPU's memory
cudaMalloc((void**)&gpu_odata, sizeof(cufftComplex)*Size);
if (cudaGetLastError() != cudaSuccess){
fprintf(stderr, "Cuda error: Failed to allocate memory for gpu_odata.\n");
return;
}
// Copy from host memory to GPU's memory
cudaMemcpy(gpu_idata, idata, Size*sizeof(float), cudaMemcpyHostToDevice);
if (cudaGetLastError() != cudaSuccess){
fprintf(stderr, "Cuda error: Could not copy idata into gpu memory.\n");
return;
}
// Create the FFT calculation plan
cufftHandle Plan;
if(cufftPlan1d(&Plan, Size, CUFFT_R2C, 1) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: Plan creation failed.\n");
return;
}
// Execute the FFT calculation plan on the GPU
if(cufftExecR2C(Plan, (cufftReal*)gpu_idata, (cufftComplex*)gpu_odata) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: ExecR2C failed.\n");
return;
}
// Copy calculated fourier coefficients back to host memory
cudaMemcpy(host_signal, gpu_odata, Size*sizeof(cufftComplex), cudaMemcpyDeviceToHost);
if (cudaGetLastError() != cudaSuccess){
fprintf(stderr, "Cuda error: Could not copy gpu_odata into host memory.\n");
return;
}
// Print results
float tmp;
for (int i=0; i<Size; i++) {
printf("[%d]: %f + %fi\n", i, host_signal[i].x, host_signal[i].y);
}
if (cudaDeviceSynchronize() != cudaSuccess){
fprintf(stderr, "Cuda error: Failed to synchronize.\n");
return;
}
cudaFree(gpu_idata);
cudaFree(gpu_odata);
cufftDestroy(Plan);
return;
}
The idata parameter is just a float array, and Size is just it’s size.
If I pass in the following array:
[1,2,3,4,5,6]
I convert it to float before I pass:
cuFFT Input Data[0]: 1.000000.
cuFFT Input Data[1]: 2.000000.
cuFFT Input Data[2]: 3.000000.
cuFFT Input Data[3]: 4.000000.
cuFFT Input Data[4]: 5.000000.
cuFFT Input Data[5]: 6.000000.
I get something like this as the result of the FFT:
[0]: 21.000000 + 0.000000i
[1]: -3.000000 + 5.196153i
[2]: -3.000000 + 1.732051i
[3]: -3.000000 + 0.000000i
[4]: -4.000000 + 0.000000i
[5]: -335898466061976380462931376697054855168.000000 + -333239989787996945065453338629242880000.000000i
But it should be:
[0]: 21.000000 + 0.000000i
[1]: -3.000000 + 5.196153i
[2]: -3.000000 + 1.732051i
[3]: -3.000000 + 0.000000i
[4]: -3.0000 - 1.7321i
[5]: -3.0000 - 5.1962i
So it seems to me the first half of the result is OK, but not the second half.
My guess is that there might be something to do with the data sizes in the memory.
Any help would be really appreciated.
Zoltan