Hi everybody,
i’m new in programming with cuda and in the moment i just write little programs to get a feeling how cuda works. Currently i have a problem with the cufft and multiple gpu. I want simply to use more gpus if possible, therefore i wrote this little program
#include <complex>
#include <math.h>
#include <fstream>
#include <iostream>
#include <cufft.h>
#include <cuda_runtime_api.h>
#define cudaCheckErrors(msg) \
do { \
cudaError_t __err = cudaGetLastError(); \
if (__err != cudaSuccess) { \
fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
msg, cudaGetErrorString(__err), \
__FILE__, __LINE__); \
fprintf(stderr, "*** FAILED - ABORTING\n"); \
exit(1); \
} \
} while (0)
typedef std::complex<double> Complex;
using namespace std;
int main(){
int n = 10;
int deviceCount;
cufftResult res;
cufftHandle plan;
Complex *in;
in = (Complex*) malloc(sizeof(Complex) * n);
for(int i = 0; i < n; i++){
in[i].real(1);
in[i].imag(0);
}
for(int i=0; i<n; i++){
cout << "in: " << i << " " << in[i] << endl;
}
cudaGetDeviceCount(&deviceCount);
if(deviceCount == 1){ //normal fft
} else { //multi gpu fft
res = cufftCreate(&plan);
if (res != CUFFT_SUCCESS) { printf ("*Create failed\n"); exit(1); }
int nGPUs = 2, whichGPUs[2];
whichGPUs[0] = 0; whichGPUs[1] = 1;
res = cufftXtSetGPUs(plan, nGPUs, whichGPUs);
if (res != CUFFT_SUCCESS) { printf ("*XtSetGPUs failed\n"); exit(1); }
size_t worksize[2];
res = cufftMakePlan1d(plan, n, CUFFT_Z2Z, 1, worksize);
if (res != CUFFT_SUCCESS) { printf ("*MakePlan* failed\n"); exit(1); }
cudaLibXtDesc *device_data_input;
res = cufftXtMalloc(plan, (void*)&device_data_input, CUFFT_XT_FORMAT_INPLACE);
if (res != CUFFT_SUCCESS) { printf ("*XtMalloc failed\n"); exit(1); }
res = cufftXtMemcpy(plan, device_data_input, in, CUFFT_COPY_HOST_TO_DEVICE);
if (res != CUFFT_SUCCESS) { printf ("*XtMemcpy failed\n"); exit(1); }
res = cufftXtExecDescriptorZ2Z(plan, device_data_input, device_data_input, CUFFT_FORWARD);
if (res != CUFFT_SUCCESS) { printf ("*XtExec* failed\n"); exit(1); }
res = cufftXtMemcpy(plan, in, device_data_input, CUFFT_COPY_DEVICE_TO_HOST);
if (res != CUFFT_SUCCESS) { printf ("*XtMemcpy failed\n"); exit(1); }
res = cufftXtFree(device_data_input);
if (res != CUFFT_SUCCESS) { printf ("*XtFree failed\n"); exit(1); }
res = cufftDestroy(plan);
if (res != CUFFT_SUCCESS) { printf ("*Destroy failed: code\n"); exit(1); }
}
for(int i=0; i<n; i++){
cout << "out: " << i << " " << in[i] << endl;
}
free(in);
return 0;
}
the normal gpu works, so i left this part out, the multi gpu part is taken from the documentation cuFFT :: CUDA Toolkit Documentation. To compile i take this command nvcc multiGPU.cu -lcufft -I/usr/local/cuda/include -o multiGPU.out -arch=sm_13, but the compiler keeps complaining that all the Xt Functions are undefined. I have allready tried to use different arch parameters but the code will not compile. Am i missing some include or what is my mistake? Is the amount of used gpus limited to two oder can i use as many as available?
Best Regards and thank you