I want to use gpu and cpu by c language, and mpi do the node communication. Now I have some trouble. Please help me if anyone knows. Thank you.
MM.h
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <dlfcn.h>
#include <omp.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include "mpi.h"
#define IDX2C(i, j, ld) (((j)*(ld))+(i))
main.c
#include "MM.h"
int main(int argc, char **argv)
{
int rank;
MPI_Status status;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
assignDeviceToProcess();
MPI_Finalize();
}
cuda.c
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#include <string.h>
#include <dlfcn.h>
#include <ctype.h>
#include <mpi.h>
#include "cuda_runtime.h"
#include "cublas.h"
#define imin(a,B) (((a)<(B))?(a):(B))
#define imax(a,B) (((a)<(B))?(B):(a))
#include <time.h>
#include <sys/types.h>
#include <sys/times.h>
#include <sys/time.h>
double *dev_scratch;
static int first_time=1;
static int myrank=0;
int stringCmp( const void *a, const void *B)
{
return strcmp(a,B);
}
void assignDeviceToProcess()
{
char host_name[MPI_MAX_PROCESSOR_NAME];
char (*host_names)[MPI_MAX_PROCESSOR_NAME];
int n, namelen, color, rank, nprocs;
size_t bytes;
MPI_Comm nodeComm;
int dev, err1;
struct cudaDeviceProp deviceProp;
/* Check if the device has been alreasy assigned */
if(first_time)
{
first_time=0;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Get_processor_name(host_name,&namelen);
bytes = nprocs * sizeof(char[MPI_MAX_PROCESSOR_NAME]);
host_names = (char (*)[MPI_MAX_PROCESSOR_NAME]) malloc(bytes);
strcpy(host_names[rank], host_name);
for (n=0; n<nprocs; n++)
{
MPI_Bcast(&(host_names[n]),MPI_MAX_PROCESSOR_NAME, MPI_CHAR, n, MPI_COMM_WORLD);
}
qsort(host_names, nprocs, sizeof(char[MPI_MAX_PROCESSOR_NAME]), stringCmp);
color = 0;
for (n=1; n<nprocs; n++)
{
if(strcmp(host_names[n-1], host_names[n])) color++;
if(strcmp(host_name, host_names[n]) == 0) break;
}
//printf ("node %s color %d \n",host_name, color );
MPI_Comm_split(MPI_COMM_WORLD, color, 0, &nodeComm);
MPI_Comm_rank(nodeComm, &myrank);
printf ("Assigning device %d to process on node %s rank %d \n",myrank, host_name, rank );
/* Assign device to MPI process, initialize BLAS and probe device properties */
cudaSetDevice(myrank);
cublasInit();
/* allocate scratch space for library on device */
err1=cudaMalloc ((void**)&dev_scratch, 3.6*134217728 * sizeof(dev_scratch[0]));
if (err1 ) printf ("Error allocating scratch space %f on %s rank %d device %d\n",3.6*134217728, host_name, rank, myrank);
}
}
makefile
DEST=exe
CC=mpicc
RUN=mpirun
#INCdirCUDA=/home/test/dgemm_test/cuda /usr/local/cuda/include
INCdirCUDA=/usr/local/cuda/include
INCdirMKL=/opt/intel/Compiler/11.1/056/mkl/include
INCdirATLAS=/opt/atlas/include
LIBdir=/usr/local/cuda/lib64
Lflag= -lcublas
$(DEST): main.o cuda.o
$(CC) -o exe main.o cuda.o libgoto2_nehalemp-r1.06.a -L$(LIBdir) $(Lflag)
main.o: main.c
$(CC) main.c -c -I$(INCdirCUDA) -I$(INCdirMKL) -I$(INCdirATLAS)
cuda.o: cuda.c
$(CC) cuda.c -c -I$(INCdirCUDA) -I$(INCdirMKL) -I$(INCdirATLAS)
run:
$(RUN) -hostfile hostfile exe
clean:
rm *.o exe
hostfile
ypc0900 slots=1
ypc0901 slots=1
after compile,when i execute it. the error is
t@ypc0900 init]$ make run
mpirun -hostfile hostfile exe
Assigning device 0 to process on node ypc0900.comp.is.uec.ac.jp rank 0
Assigning device 1 to process on node ypc0901.comp.is.uec.ac.jp rank 1
[ypc0900:14863] *** Process received signal ***
[ypc0900:14863] Signal: Segmentation fault (11)
[ypc0900:14863] Signal code: Address not mapped (1)
[ypc0900:14863] Failing at address: (nil)
[ypc0901:13503] *** Process received signal ***
[ypc0901:13503] Signal: Segmentation fault (11)
[ypc0901:13503] Signal code: Address not mapped (1)
[ypc0901:13503] Failing at address: (nil)
[ypc0900:14863] [ 0] /lib64/libpthread.so.0 [0x3452c0e4c0]
[ypc0900:14863] [ 1] /usr/lib64/libcuda.so [0x2b2d31719980]
[ypc0900:14863] [ 2] /usr/lib64/libcuda.so [0x2b2d3171f3c4]
[ypc0900:14863] [ 3] /usr/lib64/libcuda.so [0x2b2d316ef557]
[ypc0900:14863] [ 4] /usr/lib64/libcuda.so [0x2b2d3169acf7]
[ypc0900:14863] [ 5] /usr/lib64/libcuda.so [0x2b2d316ac52b]
[ypc0900:14863] [ 6] /usr/lib64/libcuda.so [0x2b2d31691940]
[ypc0900:14863] [ 7] /usr/lib64/libcuda.so [0x2b2d3168aa8a]
[ypc0900:14863] [ 8] /usr/lib64/libcuda.so(cuCtxCreate+0x57) [0x2b2d316e5187]
[ypc0900:14863] [ 9] /usr/local/cuda/lib64/libcudart.so.2 [0x2b2d2c744aa2]
[ypc0900:14863] [10] /usr/local/cuda/lib64/libcudart.so.2 [0x2b2d2c74528c]
[ypc0900:14863] [11] /usr/local/cuda/lib64/libcudart.so.2(cudaFree+0x2d) [0x2b2d2c7283dd]
[ypc0900:14863] [12] /usr/local/cuda/lib64/libcublas.so.2(cublasInitCtx+0x30) [0x2b2d2ae30110]
[ypc0900:14863] [13] /usr/local/cuda/lib64/libcublas.so.2 [0x2b2d2ae7a1f7]
[ypc0900:14863] [14] /usr/local/cuda/lib64/libcublas.so.2(cublasInit+0x50) [0x2b2d2ae302b0]
[ypc0900:14863] [15] exe(assignDeviceToProcess+0x1e6) [0x402276]
[ypc0900:14863] [16] exe(main+0x57) [0x402047]
[ypc0900:14863] [17] /lib64/libc.so.6(__libc_start_main+0xf4) [0x345201d974]
[ypc0900:14863] [18] exe [0x401f39]
[ypc0900:14863] *** End of error message ***
[ypc0901:13503] [ 0] /lib64/libpthread.so.0 [0x380240e4c0]
[ypc0901:13503] [ 1] /usr/lib64/libcuda.so [0x2b151cb48980]
[ypc0901:13503] [ 2] /usr/lib64/libcuda.so [0x2b151cb4e3c4]
[ypc0901:13503] [ 3] /usr/lib64/libcuda.so [0x2b151cb1e557]
[ypc0901:13503] [ 4] /usr/lib64/libcuda.so [0x2b151cac9cf7]
[ypc0901:13503] [ 5] /usr/lib64/libcuda.so [0x2b151cadb52b]
[ypc0901:13503] [ 6] /usr/lib64/libcuda.so [0x2b151cac0940]
[ypc0901:13503] [ 7] /usr/lib64/libcuda.so [0x2b151cab9a8a]
[ypc0901:13503] [ 8] /usr/lib64/libcuda.so(cuCtxCreate+0x57) [0x2b151cb14187]
[ypc0901:13503] [ 9] /usr/local/cuda/lib64/libcudart.so.2 [0x2b1517968aa2]
[ypc0901:13503] [10] /usr/local/cuda/lib64/libcudart.so.2 [0x2b151796928c]
[ypc0901:13503] [11] /usr/local/cuda/lib64/libcudart.so.2(cudaFree+0x2d) [0x2b151794c3dd]
[ypc0901:13503] [12] /usr/local/cuda/lib64/libcublas.so.2(cublasInitCtx+0x30) [0x2b1516054110]
[ypc0901:13503] [13] /usr/local/cuda/lib64/libcublas.so.2 [0x2b151609e1f7]
[ypc0901:13503] [14] /usr/local/cuda/lib64/libcublas.so.2(cublasInit+0x50) [0x2b15160542b0]
[ypc0901:13503] [15] exe(assignDeviceToProcess+0x1e6) [0x402276]
[ypc0901:13503] [16] exe(main+0x57) [0x402047]
[ypc0901:13503] [17] /lib64/libc.so.6(__libc_start_main+0xf4) [0x380181d974]
[ypc0901:13503] [18] exe [0x401f39]
[ypc0901:13503] *** End of error message ***
mpirun noticed that process rank 1 with PID 13503 on node ypc0901 exited on signal 11 (Segmentation fault).
make: *** [run] エラー 139
thank you for your patient.
init.rar (676 KB)