#include #include #include #include #include //#define NX 1024 //#define NY 1024 void sfftw_plan_dft_2d_(fftw_plan*, int*, int*, fftw_complex*, fftw_complex*, int*, int*); void sfftw_execute_(fftw_plan*); int main(){ cufftResult CR; cudaError_t cudastat; cufftHandle plan; fftw_plan plan2; cufftComplex *idata, *odata; float *in, *out, *out_GPU, *in_GPU; int x,y; in = (float*)malloc(NX*NY*2*sizeof(float)); in_GPU = (float*)malloc(NX*NY*2*sizeof(float)); out_GPU = (float*)malloc(NX*NY*2*sizeof(float)); out = (float*)malloc(NX*NY*2*sizeof(float)); cudaMalloc((void**)&idata, sizeof(cufftComplex)*NX*NY); cudaMalloc((void**)&odata, sizeof(cufftComplex)*NX*NY); cudaMemset(idata,999999,sizeof(cufftComplex)*NX*NY); cudaMemset(odata,999999,sizeof(cufftComplex)*NX*NY); for(y=0;y 1.001*fabs(out_GPU[x+y*NX]) || fabs(out[x+y*NX]) < 0.999*fabs(out_GPU[x+y*NX])) //printf("%f %f\n",out[x+y*NX],out_GPU[x+y*NX]); } } float tmp=0,max=0; for(y=0;y max){ max = fabs(out[x+y*NX]-out_GPU[x+y*NX]); printf("%d %d %f %f %f\n",x,y,out[x+y*NX],out_GPU[x+y*NX],max); } } } tmp = tmp/(NX*NY); printf("tmp = %f, max = %f\n",tmp,max); /****************************************************************************************************************************/ cudastat = cudaMemcpy(in_GPU,idata,NX*NY*sizeof(cufftComplex),cudaMemcpyDeviceToHost); if(cudastat != cudaSuccess){ printf("erreur de lecture sur la carte, erreur %d\n", cudastat); exit(-1); } for(y=0;y