I’m trying to make the inverse fourer transform using Cufft2D in the Tesla K80 card, but when I compare the result between the GPU and CPU, some of the array values I got from the GPU are multiplied by -1 and the absolute value of this Array looks the same as the result I got from the CPU. I have a lot of time looking for how to solve this problem, if someone can help me with this I will appreciate. you can see the code below
// Complex data type
cufftDoubleComplex* a;
// Create a complex variable on host
a = (cufftDoubleComplex *)malloc(m*n* sizeof(cufftDoubleComplex));
//Trasnfert Matrix Type Double to CufftComplex
for (int j = 0; j < n; j++){
for (int i = 0; i < m; i++){
a[IDX2C(i, j, m)].x = R[IDX2C(i, j, m)];
a[IDX2C(i, j, m)].y = I[IDX2C(i, j, m)];
}
}
double mem_size = m*n*sizeof(cufftDoubleComplex);
// Allocate device memory for signal
cufftDoubleComplex* d_a;
cudaMalloc((void**)&d_a, mem_size);
// Copy the data from host to device
cudaMemcpy(d_a, a, mem_size, cudaMemcpyHostToDevice);
// Create an plan
cufftHandle plan;
cufftPlan2d(&plan, m, n, CUFFT_Z2Z);
for (int i = 0; i < k; i++){
// Execute inverse FFT on data
cufftExecZ2Z(plan, (cufftDoubleComplex *)d_a, (cufftDoubleComplex *)d_a, 1);
}
// Copy the data from device to host
cudaMemcpy(a, d_a, mem_size, cudaMemcpyDeviceToHost);
//Trasnfert Matrix Type CufftComplex to Double
for (int j = 0; j < n; j++){
for (int i = 0; i < m; i++){
R[IDX2C(i, j, m)] = a[IDX2C(i, j, m)].x;
I[IDX2C(i, j, m)] = a[IDX2C(i, j, m)].y;
}
}
//Destroy CUFFT context
cufftDestroy(plan);
// cleanup memory
free(a);
cudaFree(d_a);
return 0;