cuFFT and Multiple GPUs

Hi everybody,

i’m new in programming with cuda and in the moment i just write little programs to get a feeling how cuda works. Currently i have a problem with the cufft and multiple gpu. I want simply to use more gpus if possible, therefore i wrote this little program

#include <complex>
#include <math.h>
#include <fstream>
#include <iostream>
#include <cufft.h>
#include <cuda_runtime_api.h>

#define cudaCheckErrors(msg) \
    do { \
        cudaError_t __err = cudaGetLastError(); \
        if (__err != cudaSuccess) { \
            fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
                msg, cudaGetErrorString(__err), \
                __FILE__, __LINE__); \
            fprintf(stderr, "*** FAILED - ABORTING\n"); \
            exit(1); \
        } \
    } while (0)

typedef std::complex<double> Complex;
using namespace std;

int main(){
  int n = 10;
  int deviceCount;
  cufftResult res;
  cufftHandle plan;

  Complex *in;
  in = (Complex*) malloc(sizeof(Complex) * n);

  for(int i = 0; i < n; i++){
    in[i].real(1);
    in[i].imag(0);
  }

  for(int i=0; i<n; i++){
    cout << "in: " << i << " "  << in[i] << endl;
  }

  cudaGetDeviceCount(&deviceCount);
 
  if(deviceCount == 1){ //normal fft
  } else { //multi gpu fft
    res = cufftCreate(&plan);
        if (res != CUFFT_SUCCESS) { printf ("*Create failed\n"); exit(1); }

        int nGPUs = 2, whichGPUs[2];
        whichGPUs[0] = 0; whichGPUs[1] = 1;
        res = cufftXtSetGPUs(plan, nGPUs, whichGPUs);
        if (res != CUFFT_SUCCESS) { printf ("*XtSetGPUs failed\n"); exit(1); }

        size_t worksize[2];

    res = cufftMakePlan1d(plan, n, CUFFT_Z2Z, 1, worksize);
        if (res != CUFFT_SUCCESS) { printf ("*MakePlan* failed\n"); exit(1); }

       cudaLibXtDesc *device_data_input;
        res = cufftXtMalloc(plan, (void*)&device_data_input, CUFFT_XT_FORMAT_INPLACE);
        if (res != CUFFT_SUCCESS) { printf ("*XtMalloc failed\n"); exit(1); }

        res = cufftXtMemcpy(plan, device_data_input,  in, CUFFT_COPY_HOST_TO_DEVICE);
        if (res != CUFFT_SUCCESS) { printf ("*XtMemcpy failed\n"); exit(1); }

    res = cufftXtExecDescriptorZ2Z(plan, device_data_input, device_data_input, CUFFT_FORWARD);
        if (res != CUFFT_SUCCESS) { printf ("*XtExec* failed\n"); exit(1); }

    res = cufftXtMemcpy(plan, in, device_data_input, CUFFT_COPY_DEVICE_TO_HOST);
        if (res != CUFFT_SUCCESS) { printf ("*XtMemcpy failed\n"); exit(1); }

        res = cufftXtFree(device_data_input);
        if (res != CUFFT_SUCCESS) { printf ("*XtFree failed\n"); exit(1); }

        res = cufftDestroy(plan);
        if (res != CUFFT_SUCCESS) { printf ("*Destroy failed: code\n"); exit(1); }
  }

  for(int i=0; i<n; i++){
    cout << "out: " << i << " "  << in[i] << endl;
  }

  free(in);

return 0;
}

the normal gpu works, so i left this part out, the multi gpu part is taken from the documentation http://docs.nvidia.com/cuda/cufft/#threed-complex-to-complex-xt-transforms. To compile i take this command nvcc multiGPU.cu -lcufft -I/usr/local/cuda/include -o multiGPU.out -arch=sm_13, but the compiler keeps complaining that all the Xt Functions are undefined. I have allready tried to use different arch parameters but the code will not compile. Am i missing some include or what is my mistake? Is the amount of used gpus limited to two oder can i use as many as available?

Best Regards and thank you

Take another look at the documentation:

http://docs.nvidia.com/cuda/cufft/index.html#accessing-cufft

#include <cufftXt.h>

regarding your question about how many GPUs are supported, it’s also in the documentation:

http://docs.nvidia.com/cuda/cufft/index.html#multiple-GPU-cufft-supported-functionality

(max of 2 GPUs)

Thanks for your fast answer, hopefully the code will now work as intended i will test it tomorrow.