I do FFT operation on matrix size 6400*80, The program runs for about 700ms. is it normal?
here is my code:
void do_fft_r2c(const int rows, const int cols, cufftReal* idata, cufftComplex* odata)
{
cufftHandle plan;
int rank = 1;
int n[1] ={cols};
int istride = 1;
int idist = cols;
int ostride =1;
int odist = cols;
int inembed[2] = {cols, rows};
int onembed[2] = {cols, rows};
cufftPlanMany(&plan, rank, n,
inembed, istride, idist,
onembed, ostride, odist,
CUFFT_R2C, rows);
cufftExecR2C(plan, idata, odata);
checkCudaErrors(cudaDeviceSynchronize());
cufftDestroy(plan);
}