Hi everyone,
I work on a 2000x2000 real matrix from a csv file and I want to perform a fft on it, like on matlab, scilab “fft2(Matrix)”, but I’am not able to get the same results…
int Nx, Ny;
cufftDoubleReal* champ = mCSVReader->getRealData(Nx, Ny);
if (!champ && Nx == -1 && Ny == -1)
{
printf("[ERROR] Invalid input matrix !\n");
return EXIT_FAILURE;
}
cufftDoubleComplex* champFFT = (cufftDoubleComplex*) malloc(Nx * Ny * sizeof(cufftDoubleComplex));
if (!launchFFT_R2C(champ, champFFT, Nx, Ny))
{
printf("[ERROR] Unable to perform fft() !\n");
return EXIT_FAILURE;
}
printComplexMatrix(champFFT, Nx, Ny, 5, true);
I have tried with the functions below :
__host__ bool launchFFT_R2C(const cufftDoubleReal * dataIn, cufftDoubleComplex * dataOut, int col, int row)
{
cufftResult_t result;
cufftHandle plan;
cufftDoubleReal *d_idata;
cufftDoubleComplex *d_odata;
cudaMalloc(&d_idata, row * col * sizeof(cufftDoubleReal));
cudaMalloc(&d_odata, row * col * sizeof(cufftDoubleComplex));
cudaMemcpy(d_idata, dataIn, row * col * sizeof(cufftDoubleReal), cudaMemcpyHostToDevice);
result = cufftPlan2d(&plan, row, col, CUFFT_D2Z); ERROR_HANDLER_CUFFT
result = cufftExecD2Z(plan, d_idata, d_odata); ERROR_HANDLER_CUFFT
cudaMemcpy(dataOut, d_odata, sizeof(cufftDoubleComplex) * row * col, cudaMemcpyDeviceToHost);
cudaFree(d_idata);
cudaFree(d_odata);
cufftDestroy(plan);
return true;
}
__host__ bool launchFFT_C2C(const cufftDoubleComplex * dataIn, cufftDoubleComplex * dataOut, int NX, int NY, int direction)
{
cudaError_t err;
cufftDoubleComplex *gpu_initial_array;
cufftDoubleComplex *gpu_transformed_array;
err = cudaMalloc(&gpu_initial_array, NX * NY * sizeof(cufftDoubleComplex)); ERROR_HANDLER_CUDA
err = cudaMalloc(&gpu_transformed_array, NX * NY * sizeof(cufftDoubleComplex)); ERROR_HANDLER_CUDA
err = cudaMemcpy(gpu_initial_array, dataIn, NX * NY * sizeof(cufftDoubleComplex), cudaMemcpyHostToDevice); ERROR_HANDLER_CUDA
err = cudaMemcpy2D(gpu_initial_array, sizeof(cufftDoubleComplex), dataIn, sizeof(double), sizeof(double), NX * NY, cudaMemcpyHostToDevice);
cufftHandle plan;
cufftResult_t result;
result = cufftPlan2d(&plan, NX, NY, CUFFT_Z2Z); ERROR_HANDLER_CUFFT
result = cufftExecZ2Z(plan, gpu_initial_array, gpu_transformed_array, direction); ERROR_HANDLER_CUFFT
err = cudaMemcpy(dataOut, gpu_transformed_array, NX * NY * sizeof(cufftDoubleComplex), cudaMemcpyDeviceToHost); ERROR_HANDLER_CUDA
cudaFree(gpu_initial_array);
cudaFree(gpu_transformed_array);
cufftDestroy(plan);
return true;
}
And I use this one to print out my results :
void Application::printComplexMatrix(const cufftDoubleComplex* data, int cols, int rows, bool cufft_symmetry)
{
int sym_cols = cols;
if (cufft_symmetry) sym_cols = cols / 2 + 1;
printf("Real Part: \n");
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
if (j >= sym_cols)
printf("%f ", data[i * sym_cols + (cols - j)].x);
else
printf("%f ", data[i * sym_cols + j].x);
printf("\n");
}
printf("Imag Part: \n");
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
if (j >= sym_cols)
printf("%f ", -data[i * sym_cols + (cols - j)].y); // complex (hermitian) symmetry
else
printf("%f ", data[i * sym_cols + j].y);
printf("\n");
}
}
Can someone help me with that ?
My wrong results on the five first elements with launchFFT_R2C() function :
Real Part:
7832466.053380 336112.003120 -241287.237986 156417.233556 -64705.152967
-1297.465045 36688.280115 -39401.822953 22499.600954 114.471259
-15039.533396 17345.256324 -9181.924529 -2246.558350 9686.908870
-10097.060700 4820.556155 2492.306439 -6415.139433 5468.776117
-1167.533065 -3182.003654 5014.081975 -2978.421501 -416.719322
Imag Part:
0.000000 1583.099541 -2273.031640 2210.305203 -1219.242600
-30.616769 1037.027652 -1299.636249 848.103546 4.755247
-709.002702 899.346736 -519.653047 -137.874700 639.530842
-714.718918 363.790663 199.798430 -545.387079 490.502953
-110.532166 -315.999213 521.184674 -324.179986 -47.576409
Appuyez sur une touche pour continuer...
The good results on the five first elements (from matlab) :
7.8284e+06+0i | 3.3606e+05+1055.8i | -2.4121e+05-1515.6i | 1.5629e+05+1473i | -64547-811.17i
3.3606e+05+1055.8i | -2.9203e+05-1834.9i | 2.2444e+05+2115.3i | -1.3986e+05-1757.6i | 55427+870.72i
-2.4121e+05-1515.6i | 2.2444e+05+2115.3i | -1.7112e+05-2150.4i | 98701+1550.5i | -30268-570.61i
1.5629e+05+1473i | -1.3986e+05-1757.6i | 98701+1550.5i | -46815-882.54i | -2167.2-47.667i
-64547-811.17i | 55427+870.72i | -30268-570.61i | -2167.2-47.667i | 28277+710.83i