Problem with call to scalapack PDGESV from MPI fortran program for large matrices

I am observing a curious error while calling scalapack routine PDGESV from an MPI fortran program to solve a set of linear simultaneous equations Ax = b, in which A is an NxN matrix, b an N-element column vector and x the N-element solution vector. I have prepared a small test program (which I have pasted in below) which reproduces the problem. The program 1) allocates space for the matrix A and vector b, 2) fills their entries with random entries 3) calls PDGESV and then 4) deallocates the memory. The program takes 4 command line arguments: N , NPROW, NPCOL, MB, where NPROW , NPCOL determine the dimensions of the blacs process array and the blocksize MB x MB is used. The program successfully completes for all values of N that are small enough, and for all selected BLACS processor arrays / block sizes for these small N cases that I have tried. I have not precisely determined the point at which failure first occurs, but have only observed success for values of N up to N = 50000, but failure for the case N=94423 (with NPROW=NPCOL = 2, MB = 32). The failure (of segmentation fault type) occurs during the call to PDGESV, and the code never returns from the call to PDGESV. Note that failure does not, in particular, occur during the allocation of memory for the matrices / initialisation of the matrices. Indeed the cause of the problem is unlikely to be lack of memory, since for the N=94423 case the required memory is around 68GB, whereas 190 GB is available.

These results suggest to me that perhaps(?) there is nothing wrong with the fortran code itself, but maybe the linking / compiling commands need to be revised for large values of N ? I compile and link in one step with :

mpifort -O -o parat.exe solve_by_lu_parallelmpi_simple_light2.for -mp -tp=haswell -mcmodel=medium -Mscalapack

and can run with
mpirun -n 4 ./parat.exe 9442 2 2 32 > DUMP01 [this will be successful]
mpirun -n 4 ./parat.exe 94423 2 2 32 > DUMP02 [this will fail]

I am running the program on a linux cluster using Red Hat Enterprise Linux Server release 7.3 (Maipo),
compiling using PGI compiler version 17.7.

Any help / advice will really be greatly appreciated.
Many thanks, Dan.

Fortran Program:

      PROGRAM SOLVE_LU
      USE MPI
      IMPLICIT NONE
      INTEGER :: N
      DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:,:) :: LOCAL_A
      DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: LOCAL_B 
      INTEGER :: ISTATUS
      INTEGER  :: INFO,  NRHS, IA, JA, IB, JB
      INTEGER, ALLOCATABLE, DIMENSION (:) :: IPIV
      INTEGER :: IARGC, N_COMMAND_ARG
      CHARACTER :: ARGV*10 
      INTEGER  :: NPROW, NPCOL, ICTXT,MYROW, MYCOL, MB, NB, MLOC, NLOC
      INTEGER :: IDESCA(9), IDESCB(9)
      INTEGER :: IERR
      INTEGER :: NUMROC
      INTEGER :: ISEEDSIZE
      INTEGER, ALLOCATABLE, DIMENSION ( :) :: SEED

      N_COMMAND_ARG = iargc()
      IF (N_COMMAND_ARG == 2) THEN
          WRITE(*,*) 'ILLEGAL NO. OF COMMAND LINE PARAMETERS'
          STOP
      ENDIF
      IF (N_COMMAND_ARG .GE. 1)THEN
          CALL GETARG(1,argv)
          READ (ARGV,'(I10)') N
      ELSE
          N = 100
      ENDIF


      IF (N_COMMAND_ARG .GE. 3)THEN
          CALL GETARG(2,argv)
          READ (ARGV,'(I10)') NPROW
          CALL GETARG(3,argv)
          READ (ARGV,'(I10)') NPCOL

      ELSE
          NPROW = 2
          NPCOL = 2
      ENDIF 

      IF (N_COMMAND_ARG .GE. 4)THEN
          CALL GETARG(4,argv)
          READ (ARGV,'(I10)') MB
      ELSE
          MB = 8
      ENDIF
      NB = MB
C     INITIALISE BLACS
      CALL SL_INIT(ICTXT, NPROW, NPCOL)
      CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )

      MLOC = NUMROC(N, MB, MYROW, 0, NPROW)
      NLOC = NUMROC(N, NB, MYCOL, 0, NPCOL)
 
      IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 )WRITE(*,*)
     @       'WE ARE SOLVING A SYSTEM OF ', N, ' LINEAR EQUATIONS'
      WRITE(*,*) 'PROC: ',MYROW, MYCOL,'HAS  MLOC, NLOC =', MLOC,NLOC

C     ALLOCATE SPACE  
      WRITE(*,*) 'PROC: ',MYROW, MYCOL,' ALLOCATING SPACE ...'
      ALLOCATE ( LOCAL_A(MLOC,NLOC), STAT = ISTATUS )
      IF(ISTATUS .NE. 0) THEN
          WRITE(*,*)'UNABLE TO ALLOCATE LOCAL_A, PROCESS: ',MYROW,MYCOL
          STOP
      ENDIF

      ALLOCATE ( LOCAL_B(MLOC), STAT = ISTATUS )
      IF (ISTATUS /= 0) THEN
          WRITE(*,*)
     @ ' FAILED TO ALLOCATE SPACE FOR LOCAL_B, PROCESS: ',MYROW,MYCOL
          STOP
      ENDIF

      CALL DESCINIT (IDESCA, N, N, MB, NB, 0, 0,
     @               ICTXT, MLOC, IERR)
      CALL DESCINIT (IDESCB, N, 1, MB, 1, 0, 0, ICTXT, MLOC, IERR)  
 
C      FILL ENTRIES OF MATRIX A AND R.H.S. VECTOR B WITH RANDOM ENTRIES      
      WRITE(*,*)'PROC: ',MYROW, MYCOL,
     @        ' CONSTRUCTING MATRIX A AND RHS VECTOR B ...'
      CALL RANDOM_SEED
      CALL RANDOM_SEED ( SIZE = ISEEDSIZE ) 
      ALLOCATE ( SEED(1:ISEEDSIZE) )
      CALL RANDOM_SEED ( GET = SEED )
      SEED(1) = SEED(1) + NPCOL*MYROW + MYCOL  
      CALL RANDOM_SEED ( PUT = SEED )  
      CALL RANDOM_NUMBER(LOCAL_B)       
      CALL RANDOM_NUMBER(LOCAL_A)
C      CALL LAPACK LU SOLVER PDGESV
      WRITE(*,*)'PROC: ',MYROW, MYCOL,
     @    'NOW SOLVING SYSTEM AX = B USING SCALAPACK PDGESV ..'
      ALLOCATE ( IPIV(MLOC + MB), STAT=ISTATUS )
      IF(ISTATUS /= 0) THEN
          WRITE(*,*)'UNABLE TO ALLOCATE IPIV, PROCESS: ',MYROW,MYCOL
          STOP
      ENDIF
      IA = 1
      JA = 1
      IB = 1
      JB = 1
      NRHS = 1
      INFO = 0
      CALL PDGESV(N, NRHS, LOCAL_A, IA, JA, IDESCA, IPIV, 
     @            LOCAL_B, IB, JB, IDESCB, INFO )
      IF( MYROW.EQ.0 .AND. MYCOL.EQ.0 ) THEN
          WRITE(*,*)
          WRITE(*,*) 'INFO code returned by PDGESV = ', INFO
          WRITE(*,*)
      END IF 
C     DEALLOCATE MEMORY
      DEALLOCATE(LOCAL_A, STAT=ISTATUS)
      IF(ISTATUS /= 0) THEN
          WRITE(*,*)'UNABLE TO DEALLOCATE ' 
          STOP
      ENDIF   
      DEALLOCATE(LOCAL_B, STAT=ISTATUS)
      IF(ISTATUS /= 0) THEN
          WRITE(*,*)'UNABLE TO DEALLOCATE ' 
          STOP
      ENDIF   
      DEALLOCATE(IPIV, STAT=ISTATUS)
      IF(ISTATUS /= 0) THEN
          WRITE(*,*)'UNABLE TO DEALLOCATE ' 
          STOP
      ENDIF    
c     ===================================================
c     RELEASE BLACS CONTEXT
      CALL BLACS_GRIDEXIT(ictxt)
      CALL BLACS_EXIT(0)
      END PROGRAM SOLVE_LU