Dear all,
I have used the cufft to do Fourier transform. I have got a problem.
The code were shown below:
#include <iostream>
#include "fftw3.h"
#include <time.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cufft.h>
#define NX 180
#define NY 230
#define NZ 190
#define BATCHSIZE 10
#define BLOCK_SIZE 32
using std::cout;
using std::endl;
cudaError_t cudaErr;
size_t freeMem = 0;
size_t totalMem = 0;
size_t allocMem = 0;
int main()
{
double scale = 1.0/(NX*NY*NZ);
for (int j=0; j < 150; j++)
{
cudaMemGetInfo(&freeMem, &totalMem);
cout<<"\titeration is :"<<j<<endl;
cufftComplex *data_dev;
cufftReal *data_dev1;
cudaMalloc((void**)&data_dev, NZ * NY * NX * sizeof(cufftComplex));
cudaMalloc((void**)&data_dev1, NZ * NY * NX * sizeof(cufftReal));
cudaMemset(data_dev, 0, NZ * NY * NX * sizeof(cufftComplex));
cudaMemset(data_dev1, 0, NZ * NY * NX * sizeof(cufftReal));
cufftReal *data_Host = (cufftReal*)malloc(NZ * NY * NX * sizeof(cufftReal));
cufftComplex *resultFFT = (cufftComplex*)malloc(NZ * NY * NX * sizeof(cufftComplex));
cufftComplex *data_Host1 = (cufftComplex*)malloc(NZ * NY * NX * sizeof(cufftComplex));
cufftReal *resultIFFT = (cufftReal*)malloc(NZ * NY * NX * sizeof(cufftReal));
for (int i = 0; i < NZ*NY * NX; i++)
{
data_Host[i] = float((rand() * rand()) % NX) / NX;
}
for (int i = 0; i < 3 ; i++)
cout<<data_Host[i]<<endl;
cufftHandle plan;
cufftPlan3d(&plan, NX, NY, NZ, CUFFT_R2C);
cudaMemcpy(data_dev1, data_Host, NZ * NY * NX * sizeof(cufftReal), cudaMemcpyHostToDevice);
cufftExecR2C(plan, data_dev1, data_dev);
cudaMemcpy(resultFFT, data_dev, NZ * NY * NX * sizeof(cufftComplex), cudaMemcpyDeviceToHost);
cout<<"=======by cuda================"<<endl;
for (int i = 0; i < 10 ; i++)
cout<<resultFFT[i].x<<"+i"<<resultFFT[i].y<<endl;
for (int i = 0; i < NZ*NY * NX; i++)
{
data_Host1[i].x = resultFFT[i].x*data_Host[i];
data_Host1[i].y = resultFFT[i].y*data_Host[i];
}
cufftPlan3d(&plan, NX, NY, NZ, CUFFT_C2R);
cudaMemcpy(data_dev, data_Host1, NZ * NY * NX * sizeof(cufftComplex), cudaMemcpyHostToDevice);
cufftExecC2R(plan, data_dev, data_dev1);
cudaMemcpy(resultIFFT, data_dev1, NZ * NY * NX * sizeof(cufftReal), cudaMemcpyDeviceToHost);
cout<<"========by cuda==============="<<endl;
for (int i = 0; i < 10 ; i++)
cout<<resultIFFT[i]*scale<<endl;
cufftDestroy(plan);
cudaFree(data_dev);
cudaFree(data_dev1);
free(data_Host);
free(resultIFFT);
free(resultFFT);
}
getchar( );
return 0;
}
When the iteration number reached 126, errors happened. The error information in vs2010 is:
combine_furier.exe 中的 0x000007fefd27a49d 处最可能的异常: Microsoft C++ 异常: 内存位置 0x001df5f0 处的 cufftResult_t。
combine_furier.exe 中的 0x000007fefd27a49d 处最可能的异常: Microsoft C++ 异常: 内存位置 0x001df5f0 处的 cufftResult_t。
combine_furier.exe 中的 0x000007fefd27a49d 处最可能的异常: Microsoft C++ 异常: 内存位置 0x001df5f0 处的 cufftResult_t。
combine_furier.exe 中的 0x000007fefd27a49d 处最可能的异常: Microsoft C++ 异常: 内存位置 0x001df5f0 处的 cufftResult_t。
combine_furier.exe 中的 0x000007fefd27a49d 处最可能的异常: Microsoft C++ 异常: 内存位置 0x001df5f0 处的 cufftResult_t。
combine_furier.exe 中的 0x000007fefd27a49d 处最可能的异常: Microsoft C++ 异常: 内存位置 0x001df5f0 处的 cufftResult_t。
How to solve this problem? Is that means the function cudaFree() doesn’t work?