I have a cufftReal array, which I initialize to 1.0
g = (cufftComplex*) malloc(sizeof(cufftComplex)Nx(Ny/2 + 1)Nz);
gt = (cufftReal) malloc(sizeof(cufftReal)NxNyNz);
cudaMalloc((void**) &g_d, sizeof(cufftComplex)Nx(Ny/2 + 1)Nz);
cudaMalloc((void*) >_d, sizeof(cufftReal)NxNyNz);
for(int k=0; k<Nz; k++) {
for(int j=0; j<Nx; j++) {
for(int i=0; i<Ny; i++) {
index = i + Ny*j + Ny*Nx*k;
gt[index] = 1.0 ;
}
}
}
I then create a plan –
cufftHandle pbkd;
cufftPlan3d(&pbkd, Nz, Nx, Ny, CUFFT_R2C);
Transfer the array to device memory and take a fourier transform;
cudaMemcpy(gt_d, gt, sizeof(cufftReal)NxNy*Nz, cudaMemcpyHostToDevice);
cufftExecR2C(pbkd, gt_d, g_d)
I move g_d to the host and print out the result,
cudaMemcpy(g, g_d, sizeof(cufftComplex)Nx(Ny/2 + 1)*Nz, cudaMemcpyDeviceToHost);
Depending on the resolution I expect a finite value for g[0][0][0]; and 0 for everything else. But for some reason, this is the output I get which I fail to understand –
g[0, 0, 0] = 64.000000, -0.000000
g[0, 0, 3] = 1.000000, 1.000000
g[0, 1, 3] = 1.000000, 1.000000
g[0, 2, 3] = 1.000000, 1.000000
g[1, 0, 3] = 1.000000, 1.000000
g[1, 1, 3] = 1.000000, 1.000000
g[1, 2, 3] = 1.000000, 1.000000
g[2, 0, 3] = 1.000000, 1.000000
g[2, 1, 3] = 1.000000, 1.000000
g[2, 2, 3] = 1.000000, 1.000000
g[3, 0, 3] = 1.000000, 1.000000
g[3, 1, 3] = 1.000000, 1.000000
g[3, 2, 3] = 1.000000, 1.000000