Hello everybody,
I am going to run 2D complex-to-complex cuFFT on NVIDIA K40c consisting of 12 GB memory. However, there is a problem with cufftPlan2d for some sizes. For instance, for a given size of X=Y=22912, it ends up with CUFFT_ALLOC_FAILED error. It is noteworthy that the command works very well with larger size of X=Y= 23040. That is, the error does not occur because of memory leakage.
I tried to estimate the required size for performing FFT on a 2D matrix of 2291222912 using cufftEstimate2d(). It is surprisingly returns zero workload size, but the output for 2304023040 is around 3.5 GB.
You can find the code following.
Regards,
Hamidreza
==================================================
void fft_kernel_gpu_single_fit(const int M, const int N, cufftComplex *A, int tSize)
{
printf("fft_kernel_gpu_single_fit: M %d N %d\n", M, N);
cufftComplex *gpudata;
if (cudaMalloc((void**)&gpudata, tSize * sizeof(cufftComplex)) != CUFFT_SUCCESS)
{
fprintf(stderr, "cudaMalloc Error: Unable to alloc memory\n");
return;
}
if (cudaMemcpy(gpudata, A, tSize * sizeof(cufftComplex), cudaMemcpyHostToDevice) != CUFFT_SUCCESS)
{
fprintf(stderr, "cudaMemcpy Error: Unable to copy data from host to gpu\n");
return;
}
cufftHandle plan;
/*int n[2] = {M, N};
if (cufftPlanMany(&plan, 2, n, NULL, 1, 0, NULL, 1, 0, CUFFT_C2C, 1) != CUFFT_SUCCESS)
{
fprintf(stderr, "CUFFT Error: Unable to create plan\n");
return;
}*/
if (cufftPlan2d(&plan, M, N, CUFFT_C2C) != CUFFT_SUCCESS)
{
fprintf(stderr, "CUFFT Error: Unable to create plan\n");
return;
}
if (cufftExecC2C(plan, gpudata, gpudata, CUFFT_FORWARD) != CUFFT_SUCCESS)
{
fprintf(stderr, "CUFFT Error: Unable to execute plan\n");
return;
}
if (cudaDeviceSynchronize() != cudaSuccess)
{
fprintf(stderr, "Cuda Error: Failed to synchronize\n");
return;
}
if (cudaMemcpy(A, gpudata, tSize * sizeof(cufftComplex), cudaMemcpyDeviceToHost) != CUFFT_SUCCESS)
{
fprintf(stderr, "cudaMemcpy Error: Unable to copy data from gpu to host\n");
return;
}
if (cudaDeviceSynchronize() != cudaSuccess)
{
fprintf(stderr, "Cuda Error: Failed to synchronize\n");
return;
}
cufftDestroy(plan);
cudaFree(gpudata);