cuFFT handle in C Struct

Please Help,

I have been going around with this issue for a couple of days and can’t seem to understand why it is not working. I have a C-code struct which has a cufftHandle variable (fftHandle) and is defined in a separate C file (defines.cu). However when I try to access the fftHandle variable from a different C file it doesn’t work. There are no compile errors, but it doesn’t seem to work - i.e., after calling functions declared in my header file (defines.cuh) from another C file (other.cu) the values returned from GPU are always zero.

Can anyone offer any help?

The code follows:

defines.cuh

// includes for cuffs here

typedef struct{
  cufftHandle fftHandle;
} myFFT;
void initFFT(myFFT *fft, const int n);
void execFFT(myFFT *fft, cuComplex *d_in, cuComplex *d_out);

defines.cu

#include "defines.cuh"

void initFFT(myFFT *fft, const int n){
  cufftPlan1d(&cufft->fftHandle, n, CUFFT_C2C, 1);
}
void execFFT(myFFT *fft, cuComplex *d_in, cuComplex *d_out){
  cufftExecC2C(fft->fftHandle, d_in, d_out, CUFFT_FORWARD);
}

other file: other.cu

#include "defines.cuh"

...
myFFT myfft;
const int n = 10;

cuComplex *d_in;
cuComplex *d_out;
cudaMalloc((void**)&d_in, n*sizeof(cuComplex));
cudaMalloc((void**)&d_out, n*sizeof(cuComplex));

// HOST
cuComplex *h_in;
cuComplex *h_out;
h_in = (cuComplex*)malloc(n*sizeof(cuComplex));
h_out = (cuComplex*)malloc(n*sizeof(cuComplex));

// load some data into HOST array h_in, e.g. random numbers [1-10]

// copy to DEVICE
cudaMemcpy(d_in, h_in, n*sizeof(cuComplex), cudaMemcpyHostToDevice);

// create a plan
initFFT(&myfft, n);

// execute 
execFFT(&myfft, d_in, d_out);

// get result from DEVICE
cudaMemcpy(h_out, d_out, n*sizeof(cuComplex) cudaMemcpyDeviceToHost);

// at this stage h_out always has all zeros

I doubt this is the code you are actually using (especially initFFT). So its easier for others if you do a better job of representing what you are doing.

I would also recommend doing proper error checking throughout, both CUFFT and CUDA runtime API calls.

Thank you for the response.

The code I showed in my post is not complete, I wanted to just show the important part(s). However, the initFFT function is the same. It’s suppose to be a simple FFT FORWARD code that calls CUFFT operations “under the hood” - but the driver.cu output is always a set of zeros.

I guess the main problem I am having is how to create a single struct with a cufftHandle defined within that can be used by any number of .cu files. Am I defining the struct for this type of situation incorrectly? I am not a C expert.

I have included the complete code below:

in myfft.cuh:

// Standard C
#include <stdio.h>
#include <stdlib.h>

// CUDA
#include <cuda.h>
#include <cufft.h>
#include <cuComplex.h>

// struct that I want to hold CUFFT handle within
typedef struct{
  cufftHandle fftHandle;
} myFFT;

// functions I want to operate on defined struct such that the
// underlying CUFFT library can be employed
void initFFT(myFFT *fft, const int n);
void execFFT(myFFT *fft, cuComplex *d_in, cuComplex *d_out);

in myfft.cu:

#include "myfft.cuh"

void initFFT(myFFT *fft, const int n){
  cufftPlan1d(&cufft->fftHandle, n, CUFFT_C2C, 1);
}

void execFFT(myFFT *fft, cuComplex *d_in, cuComplex *d_out){
  cufftExecC2C(fft->fftHandle, d_in, d_out, CUFFT_FORWARD);
}

in driver.cu

#include <stdio.h>
#include <stdlib.h>

#include "myfft.cuh"

int main(int argc, char **argv){

int i = 0;

// defined structure with cufftHandle in it
myFFT myfft;

// size of array
const int n = 100;

// DEVICE memory
cuComplex *d_in;
cuComplex *d_out;
cudaMalloc((void**)&d_in, n*sizeof(cuComplex));
cudaMalloc((void**)&d_out, n*sizeof(cuComplex));

// HOST memory
cuComplex *h_in;
cuComplex *h_out;
h_in = (cuComplex*)malloc(n*sizeof(cuComplex));
h_out = (cuComplex*)malloc(n*sizeof(cuComplex));

// load some data into HOST array h_in:
for(i = 0; i < n; ++i){
  h_in[i].x = i + 1;
  h_in[i].y = 1;
}

// copy to DEVICE
cudaMemcpy(d_in, h_in, n*sizeof(cuComplex), cudaMemcpyHostToDevice);

// create a plan
initFFT(&myfft, n);

// execute 
execFFT(&myfft, d_in, d_out);

// get result from DEVICE
cudaMemcpy(h_out, d_out, n*sizeof(cuComplex) cudaMemcpyDeviceToHost);

// at this stage h_out always has all zeros - only checking REALS
for(i = 0; i < n; ++i){
  printf("%f\n", h_out[i].x);
}

free(h_in);
free(h_out);
cudaFree(d_in);
cudaFree(d_out);

return 0;
}

This cannot possibly work, it should not even compile:

void initFFT(myFFT *fft, const int n){
  cufftPlan1d(&cufft->fftHandle, n, CUFFT_C2C, 1);
               ^^^^^

This also won’t compile:

cudaMemcpy(h_out, d_out, n*sizeof(cuComplex) cudaMemcpyDeviceToHost);

So if you claim to be compiling and running this code, I don’t believe you.

Here’s an example that seems to work for me:

$ cat defines.cuh
#include <cufft.h>

typedef struct{
  cufftHandle fftHandle;
} myFFT;
void initFFT(myFFT *fft, const int n);
void execFFT(myFFT *fft, cuComplex *d_in, cuComplex *d_out);

$ cat defines.cu
#include "defines.cuh"

void initFFT(myFFT *fft, const int n){
  cufftPlan1d(&fft->fftHandle, n, CUFFT_C2C, 1);
}
void execFFT(myFFT *fft, cuComplex *d_in, cuComplex *d_out){
  cufftExecC2C(fft->fftHandle, d_in, d_out, CUFFT_FORWARD);
}
$ cat other.cu
#include "defines.cuh"
#include <cuComplex.h>
#include <iostream>
int main(){
myFFT myfft;
const int n = 10;

cuComplex *d_in;
cuComplex *d_out;
cudaMalloc((void**)&d_in, n*sizeof(cuComplex));
cudaMalloc((void**)&d_out, n*sizeof(cuComplex));

// HOST
cuComplex *h_in;
cuComplex *h_out;
h_in = (cuComplex*)malloc(n*sizeof(cuComplex));
h_out = (cuComplex*)malloc(n*sizeof(cuComplex));

// load some data into HOST array h_in, e.g. random numbers [1-10]
for (int i = 0; i < n; i++){
  h_in[i].x = 1.0f;
  h_in[i].y = i;}
// copy to DEVICE
cudaMemcpy(d_in, h_in, n*sizeof(cuComplex), cudaMemcpyHostToDevice);

// create a plan
initFFT(&myfft, n);

// execute
execFFT(&myfft, d_in, d_out);

// get result from DEVICE
cudaMemcpy(h_out, d_out, n*sizeof(cuComplex), cudaMemcpyDeviceToHost);

for (int i = 0; i < 10; i++)
  std::cout << h_out[i].x << std::endl;
}
$ nvcc -o test other.cu defines.cu -lcufft

$ ./test
10
-15.3884
-6.88191
-3.63271
-1.6246
0
1.6246
3.63271
6.88191
15.3884

Okay, sorry. The code I posted had typos when I was writing from memory - I didn’t have the code in front of me.

So there is nothing inherently wrong with the way I am using struct and cufftHandle with functions (as per your code)? Interesting, given your code works there must be something else I am doing incorrectly.

Thank you for your help.