Hi,
I tried doing FFT on a 2D array along rows using cufftPlanMany(). The array is in column major order. The code and output is pasted next. Can someone help here and tell me what arguments do I pass to achieve this. You can assume a 4x4 matrix and hard code the values to be passed to cufftPlanMany(). I just want to understand how I could make this work. At this moment, cufftPlanMany seems to be working only on the first row.
#include <stdio.h>
#include <cufft.h>
typedef cuFloatComplex cF;
#define CUDA(call) do { \
cudaError_t _e = (call); \
if (_e == cudaSuccess) break; \
fprintf(stderr, __FILE__":%d: cuda failure: %s (%d)", \
__LINE__, cudaGetErrorString(_e), _e); \
exit(-1); \
} while (0)
#define CUFFT(call) do { \
cufftResult_t _e = (call); \
if (_e == CUFFT_SUCCESS) break; \
fprintf(stderr, __FILE__":%d: cufft failure: (%d)", \
__LINE__, _e); \
exit(-1); \
} while (0)
void show(cF *in, int rows, int cols, char *str)
{
printf("%s=\n", str);
unsigned bytes = sizeof(cF) * rows * cols;
cF *in_h = (cF *)malloc(bytes);
CUDA(cudaMemcpy(in_h, in, bytes, cudaMemcpyDeviceToHost));
for (int i=0; i < rows; i++) {
for(int j=0; j < cols; j++) {
printf("%f %fi ", in_h[i + j* rows].x, in_h[i + j * rows].y);
}
printf("\n");
}
free(in_h);
}
int main()
{
int rows = 4;
int cols = 4;
unsigned bytes = sizeof(cF) * rows * cols;
cF *d_in, *d_out;
CUDA(cudaMalloc(&d_in , bytes));
CUDA(cudaMalloc(&d_out, bytes));
cF *h_in = (cF *)malloc(bytes);
for (int i=0; i < rows * cols; i++)
h_in[i] = make_float2((float)rand()/RAND_MAX, (float)rand()/RAND_MAX);
// Move data to device
CUDA(cudaMemcpy(d_in, h_in, bytes, cudaMemcpyHostToDevice));
// Print input
show( d_in, rows, cols, "d_in");
// Zero out output buffer
CUDA(cudaMemset(d_out, 0, bytes));
// create FFT Plan
cufftHandle plan;
int stride = rows;
int rank = cols; // Number of points
int length = cols;
// 1D FFT Plan
CUFFT(cufftPlanMany(&plan, 1, &rank, &rows, stride, sixteen,
&sixteen, stride, rows, CUFFT_C2C, 4));
// Execute forward FFT
CUFFT(cufftExecC2C(plan, (cF *)d_in, (cF *)d_out, CUFFT_FORWARD));
show( d_out, rows, cols, "d_out");
CUFFT(cufftDestroy(plan));
}
$ ./a.out
d_in=
0.394383 0.840188i 0.553970 0.277775i 0.717297 0.635712i 0.400944 0.156679i
0.798440 0.783099i 0.628871 0.477397i 0.606969 0.141603i 0.108809 0.129790i
0.197551 0.911647i 0.513401 0.364784i 0.242887 0.016301i 0.218257 0.998924i
0.768230 0.335223i 0.916195 0.952230i 0.804177 0.137232i 0.839112 0.512932i
d_out=
2.066594 1.910353i -0.201818 0.051450i 0.156766 1.041446i -0.444010 0.357502i
0.000000 0.000000i 0.000000 0.000000i 0.000000 0.000000i 0.000000 0.000000i
0.000000 0.000000i 0.000000 0.000000i 0.000000 0.000000i 0.000000 0.000000i
0.000000 0.000000i 0.000000 0.000000i 0.000000 0.000000i 0.000000 0.000000i