Hi,
one user reported a problem with large data fields in a simulation
using mpi.
The code is running using old mpi1 from PGI, but not with mpi2 (much faster) or
openmpi (self compiled). The error message is always:
Fatal error in PMPI_Gather: Other MPI error
Nothing else. He wrote a small sample prog to reproduce the error.
Any idea, why it’s not working with mpi2 and/or openmpi ?
Bye, Peer
------------------ more globe.f90
program globe
use mpi
implicit none
integer :: mpinfo = 0
integer :: myworld = 0
integer :: mypid = 0
integer :: npro = 1
! * The comments give some conditions required to reproduce the problem.
! * If the program runs at two hosts, the error message is shown two times
integer, parameter :: vv_g_d1 = 2432
integer, parameter :: vv_p_d1 = vv_g_d1 / 16 ! requires 16 CPUs
integer, parameter :: out_d1 = 2418 ! requires >=2416 (vv_g_d1 - 16)
integer, parameter :: d2 = 5001 ! requires >=4282 @ ii=30 / >=6682 @ ii=20 (depends on number of loops, but this limit can change for unknown reason)
integer :: ii, jj
real :: vv_p(vv_p_d1,d2)
real,allocatable :: vv_g(:,:)
! * requires the definition of the variable for write to be defined below vv_g(:,:)
real :: out(out_d1,d2)
vv_p(:,:) = 0.0
out(:,:) = 0.0
call mpi_init(mpinfo)
myworld = MPI_COMM_WORLD
call mpi_comm_size(myworld, npro, mpinfo)
! * The problem requires 16 CPUs
if (npro .ne. 16) then; write(,) “Works only with 16 CPUs”; stop; endif
call mpi_comm_rank(myworld, mypid, mpinfo)
if (mypid == 0) then
open(11, FILE=‘jedi_restart’, STATUS=‘replace’, FORM=‘unformatted’)
endif
write(6,*) “test1”,mypid ; flush(6)
do ii = 1, 25 ! number of loops depends on field size
allocate(vv_g(vv_g_d1,d2))
do jj = 1, d2
call mpi_gather(vv_p(1,jj), vv_p_d1, MPI_REAL, vv_g(1,jj), vv_p_d1, MPI_REAL, 0, myworld, mpinfo)
enddo
if (mypid == 0) then; write(11) out; flush(11); endif
deallocate(vv_g)
enddo
write(6,*) “test2”,mypid ; flush(6)
if (mypid == 0) close(11)
call mpi_barrier(myworld, mpinfo)
call mpi_finalize(mpinfo)
end
--------------------------- end globe.f90
----------makefile --------
OPTIONS=-assume byterecl -fpp -allow nofpp_comments -free
DEBUG=-g -d-lines -check -debug -debug-parameters -fpe0 -traceback
all:
rm -f JeDi globe_mod.mod JeDi.out jedi_restart
$(SOURCE) ; mpif90 $(OPTIONS) $(DEBUG) -o JeDi globe.f90
For test start it like:
mpirun -np 16 --host localhost ./JeDi