luistm
1
Hi.
I’m tring to use CUFFT to compute the complex fourier transform of some data, but the results are wrong.
Can anyone see a problem on the following code?
void k_CUDA_FFT(float **data_pointers, int* args){
int N = args[0];
// allocate space on device
float2* d_InputSignal;
cudaMalloc((void**)&d_InputSignal, N*sizeof(float2));
// copy data from host to device
float2* h_InputSignal = new float2[N];
for(int i=0; i< N; i++){
h_InputSignal[i].x=data_pointers[0][i];
h_InputSignal[i].y=data_pointers[1][i];
}
cudaMemcpy(d_InputSignal, h_InputSignal,N*sizeof(float2), cudaMemcpyHostToDevice );
// CUFFT plan
cufftHandle plan;
int size = sizeof(float2) * N;
cufftPlan1d(&plan, size, CUFFT_C2C, 1);
/* executes FFT processes */
if(args[1]==1)
cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_FORWARD);
//else
// cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_INVERSE);
// copy data from device to host
cudaMemcpy(h_InputSignal,d_InputSignal,N*sizeof(float2),cudaMemcpyDeviceToHost);
for(int i=0; i< N; i++){
data_pointers[0][i] = h_InputSignal[i].x;
data_pointers[1][i] = h_InputSignal[i].y;
}
cufftDestroy(plan);
cudaFree(h_InputSignal);
cudaFree(d_InputSignal);
}
Input:
REAL:0.841471 0.909297 0.141120 -0.756802 -0.958924 -0.279415 0.656987 0.989358
IMAG:0.841471 0.909297 0.141120 -0.756802 -0.958924 -0.279415 0.656987 0.989358
The correct result should be :
REAL: 1.543091 0.799255 0.427607 0.136506 -0.181784 -0.686254 -2.258727 6.952074
IMAG: 0.332754 0.552790 0.662729 0.748840 0.842995 0.992223 1.457381 -1.267293
The wrong result is:
REAL: 1.543091 1.803324 1.575109 1.012566 0.405025 0.030154 0.028103 0.350289
IMAG: 0.332754 -0.238005 -0.809705 -1.081805 -0.935108 -0.462346 0.098292 0.488494
Thanks
luistm
2
Hi again!
The problem is in “cufftPlan1d(&plan, size, CUFFT_C2C, 1);”.
Size should be the number of points of the FFT.
Thanks
The problem should be in your call:
cufftPlan1d(&plan, size, CUFFT_C2C, 1);
size of the transform is in element, you are passing the number of bytes.
Try:
cufftPlan1d(&plan, N, CUFFT_C2C, 1);
luistm
4
Now i have a problem with this code:
int N = args[0];
// allocate space on device
float2* d_InputSignal;
printf("Before cudaMalloc call\n");fflush(stdout);
cudaMalloc((void**)&d_InputSignal, N*N*sizeof(float2));
// copy data from host to device
float2* h_InputSignal = new float2[N];
for(int i=0; i< N*N; i++){
h_InputSignal[i].x=data_pointers[0][i];
h_InputSignal[i].y=data_pointers[1][i];
}
printf("Before cudaMemcpy call\n");fflush(stdout);
cudaMemcpy(d_InputSignal, h_InputSignal,N*N*sizeof(float2), cudaMemcpyHostToDevice );
// CUFFT plan
cufftHandle plan;
int size = N;
printf("Before cudaPlan call\n");fflush(stdout);
cufftPlan2d(&plan, size, size, CUFFT_C2C);
printf("cudaPlane return\n");fflush(stdout);
/* executes FFT processes */
printf("Before cufft call\n");fflush(stdout);
if(args[1]==1){
cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_FORWARD);
}
else{
cufftExecC2C(plan, (cufftComplex *)d_InputSignal, (cufftComplex *)d_InputSignal, CUFFT_INVERSE);
}
// copy data from device to host
cudaMemcpy(h_InputSignal,d_InputSignal,N*N*sizeof(float2),cudaMemcpyDeviceToHost);
for(int i=0; i< N*N; i++){
data_pointers[0][i] = h_InputSignal[i].x;
data_pointers[1][i] = h_InputSignal[i].y;
}
It gives an error in cudaPlan2d:
Before cudaPlan call
cudaFft: malloc.c:3096: sYSMALLOc: Assertion `(old_top == (((mbinptr) (((char *) &((av)->bins[((1) - 1) * 2])) - __builtin_offsetof (struct malloc_chunk, fd)))) && old_size == 0) || ((unsigned long) (old_size) >= (unsigned long)((((__builtin_offsetof (struct malloc_chunk, fd_nextsize))+((2 * (sizeof(size_t))) - 1)) & ~((2 * (sizeof(size_t))) - 1))) && ((old_top)->size & 0x1) && ((unsigned long)old_end & pagemask) == 0)' failed.
luistm
5
The problem was here:
float2* h_InputSignal = new float2[N];
It should be
float2* h_InputSignal = new float2[N*N];