Hello,
I know this is an old post, anyway, I’ll give it a try.
I can run this test (t734.cu) as it is; however, when I try to modify it splitting it into functions or files I cannot make it run. I have 3 functions/files, including main, allocates and exec.
Firstly, I declare the : plan
, *d_idata, *d_odata
& *h_odata
as globals. I changed A to be dynamic and in the main function allocate and initialize it.
Secondly, allocates is called, to allocate/initialize all the global variables and creating the plan.
Finally, exec is called to execute the R2C fft.
Unfortunately, I got illegal memory access error and out of bounds when using compute-sanitizer.
Here is the complete minimal example using functions, but same error I got using files.
#include <cufft.h>
#include <stdio.h>
cufftHandle plan;
cufftReal *d_idata=NULL;
cufftComplex *h_odata=NULL, *d_odata=NULL;
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
void Print2DComplex(int rows, int cols, cufftComplex *data, bool cufft_symmetry = false){
int sym_cols = cols;
if (cufft_symmetry) sym_cols = cols/2 + 1;
printf("Real Part: \n");
for (int i = 0; i < rows; i++){
for (int j = 0; j < cols; j++)
if (j>=sym_cols)
printf("%f ", data[i*sym_cols+(cols-j)].x);
else
printf("%f ", data[i*sym_cols+j].x);
printf("\n");}
printf("Imag Part: \n");
for (int i = 0; i < rows; i++){
for (int j = 0; j < cols; j++)
if (j>=sym_cols)
printf("%f ", -data[i*sym_cols+(cols-j)].y); // complex (hermitian) symmetry
else
printf("%f ", data[i*sym_cols+j].y);
printf("\n");}
}
void allocates(cufftHandle *plan, cufftReal *d_idata, cufftComplex *h_odata, cufftComplex *d_odata, const int row, const int col, float *A)
{
gpuErrchk(cudaMalloc((void**)&d_idata, sizeof(cufftComplex)*row*col));
gpuErrchk(cudaMalloc((void**)&d_odata, sizeof(cufftComplex)*row*col));
gpuErrchk(cudaMemset(d_idata, 0, sizeof(cufftComplex)*row*col));
gpuErrchk(cudaMemset(d_odata, 0, sizeof(cufftComplex)*row*col));
gpuErrchk(cudaMallocHost((void**)&h_odata, sizeof(cufftComplex)*row*col));
gpuErrchk(cudaMemcpy(d_idata,A,sizeof(cufftReal)*row*col,cudaMemcpyHostToDevice));
if ((cufftPlan2d(plan, row,col, CUFFT_R2C))!= CUFFT_SUCCESS) {printf("cufft plan error\n"); exit(-1);}
}
void exec(cufftHandle plan, cufftReal *d_idata, cufftComplex *h_odata, cufftComplex *d_odata, const int row, const int col)
{
if ((cufftExecR2C(plan, (cufftReal*)d_idata, (cufftComplex*)d_odata))!=CUFFT_SUCCESS) {printf("cufft exec error\n"); exit(-1);}
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaMemcpy(h_odata,d_odata,sizeof(cufftComplex)*row*col,cudaMemcpyDeviceToHost));
gpuErrchk(cudaDeviceSynchronize());
}
int main()
{
const int row = 4;
const int col = 4;
/*
double A[row][col] =
{{ 1, 2, 3, 4},
{ 5, 6, 7, 8},
{ 9,10,11,12},
{13,14,15,16}};
*/
float *A = (float*)calloc(row*col , sizeof(float));
int j=0;
for(int i=1; i <= (row*col); i++)
A[j++] = i;
allocates(&plan, d_idata, h_odata, d_odata, row, col, A);
//bool symmetric_data = false;
exec(plan, d_idata, h_odata, d_odata, row, col);
//symmetric_data = true;
// Print2DComplex(row,col,h_odata, symmetric_data);
return 0;
}
I compiled it with: nvcc t734-cufft-R2C-functions-nvidia-forum.cu -o t734-cufft-R2C-functions-nvidia-forum -lcufft
But I got:
GPUassert: an illegal memory access was encountered t734-cufft-R2C-functions-nvidia-forum.cu 56
I tried the --device-c option compiling them when the functions were on files, without any luck.
I guess that the d_idata & d_odata aren’t visible/accessible on exec function/file, so how can I make them accessible to exec function/file?
Could you help me please.
Thanks