mpif90+cudaC Mixed compilation problems

      program hello_world
      implicit none
      include 'mpif.h'
      integer ierr,numprocs,myid
      integer a(2),b(2),c(2)
      a=2
      b=1
      c=0
      call MPI_INIT(ierr)
      call MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr)
      call MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
      print*,'myid=',myid,numprocs
      call add(a,b,c)
      if(myid.eq.0)then
       print*,'c=',c(1),c(2)
      endif
      call MPI_FINALIZE(ierr)
      end program hello_world



#include<stdio.h>
#include<cuda_runtime.h>
extern "C" __global__ void add1(int *a_device,int *b_device,int *c_device)
{
 int it=threadIdx.x+blockDim.x*blockIdx.x;
 if(it<2){
   c_device[it]=a_device[it]+b_device[it];
 }
}
extern "C" void add_(int *a ,int *b,int *c)
{
  int *a_device,*b_device,*c_device;
  cudaMalloc(&a_device,2*sizeof(int));
  cudaMalloc(&b_device,2*sizeof(int));
  cudaMalloc(&c_device,2*sizeof(int));
  cudaMemset(a_device,0,2*sizeof(int));
  cudaMemset(b_device,0,2*sizeof(int));
  cudaMemset(c_device,0,2*sizeof(int));

  float mstimer;
  cudaEvent_t start,stop;
  cudaEventCreate(&start);
  cudaEventCreate(&stop);
  cudaMemcpy(a_device,a,2*sizeof(int),cudaMemcpyHostToDevice);
  cudaMemcpy(b_device,b,2*sizeof(int),cudaMemcpyHostToDevice);
  cudaMemcpy(c_device,c,2*sizeof(int),cudaMemcpyHostToDevice);
  add1 <<<1,2>>> (a_device,b_device,c_device);
  cudaMemcpy(a,a_device,2*sizeof(int),cudaMemcpyDeviceToHost);
  cudaMemcpy(b,b_device,2*sizeof(int),cudaMemcpyDeviceToHost);
  cudaMemcpy(c,c_device,2*sizeof(int),cudaMemcpyDeviceToHost);
  cudaEventRecord(stop);
  cudaEventSynchronize(stop);
  cudaEventElapsedTime(&mstimer,start,stop);
  printf("CUDA: time= %g(s)\n",mstimer*1.e-3) ;
  cudaEventDestroy(start);
  cudaEventDestroy(stop);
  cudaFree(a_device);
  cudaFree(b_device);
  cudaFree(c_device);
}

makefile:

CUDA_INSTALL_PATH=/usr/local/cuda-10.1
MPI_INSTALL_PATH=/opt/mpich2-1.4.1p1
NVCC =$(CUDA_INSTALL_PATH)/bin/nvcc
MPIF90= $(MPI_INSTALL_PATH)/bin/mpif90
LIBS =-lcudart -lcurand -L$(CUDA_INSTALL_PATH)/lib64

FFILES=hello_world.f90
CUFILES=add.cu
OBJECTS=hello_world.o add.o
EXENAME=test.x
all:
        $(MPIF90) -c $(FFILES)
        $(NVCC) -arch=sm_35 -c $(CUFILES)
        $(MPIF90) -o $(EXENAME) $(LIBS) -lstdc++ $(OBJECTS)
clean:
        rm -f *.o 
        rm -f test.x

problem: The program compiled successfully。But when I run this program,only the main process 0 is working. I’d like to solve this problem…

Hi M_yeah,

Can you define what you mean by it doesn’t work except for the main process?

If I remove the check in hello_world.f90 to only print if it’s rank 0, then all ranks give the result of c=3. Not sure if this is the problem, i.e. the code only prints out rank 0 and why you think the other ranks aren’t working, or if you’re seeing something different.

% cat hello_world.f90
      program hello_world
      implicit none
      include 'mpif.h'
      integer ierr,numprocs,myid
      integer a(2),b(2),c(2)
      a=2
      b=1
      c=0
      call MPI_INIT(ierr)
      call MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr)
      call MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
      print*,'myid=',myid,numprocs
      call add(a,b,c)
      !if(myid.eq.0)then
       print*,'c=',c(1),c(2)
      !endif
      call MPI_FINALIZE(ierr)
      end program hello_world
% make
mpif90 -c hello_world.f90
nvcc -arch=sm_70 -c add.cu
mpif90 -o test.x -lcudart -lcurand -L/opt/cuda-10.1/lib64 -lstdc++ hello_world.o add.o
% mpirun -np 4 test.x
 myid=            0            4
 myid=            1            4
 myid=            2            4
 myid=            3            4
CUDA: time= 0(s)
 c=            3            3
CUDA: time= 0(s)
 c=            3            3
CUDA: time= 0(s)
 c=            3            3
CUDA: time= 0(s)
 c=            3            3

I find this problem.
$ which mpif90
~/PGI/linux86-64/19.4/mpi/openmpi-3.1.3/bin/mpif90

Hi M_yeah,

Sorry but I’m still not understanding what issue you’re encountering.

Though you should be consistent with which MPI install you’re using and the MPI should be configured for use with the PGI compilers. In you’re makefile, you have it setup to use “/opt/mpich2-1.4.1p1/bin/mpif90” which is different what you show above in your PATH, i.e. the OpenMPI that ships with the PGI compilers.

Could the problem be that you’re using the mpirun from the PGI build of OpenMPI since it’s in your PATH, but built using your MPICH install?

This is just a guess so if incorrect, can you please provide more details?

-Mat

Yes,your guess is correct. I change my PATH in “.bash_profile”
export PATH=/opt/mpich2-1.4.1p1/bin:$PATH
The answer is right.