Hi,
I am porting a Fortran application to NVIDIA GPU. Currently, I choose CUDA Fortran to do the job.
The code compile/link/run successfully with no errors/warnings with PGI-16.10ce and PGI-17.4ce compiler for cuda8.0 and cc60.
However with PGI-17.10ce on a supercomputer, there is an Internal compiler error when compiling one file.
Detailed Software information:
pgfortran community edition 17.4-0 64-bit target on x86-64 Linux -tp piledriver on my desktop(Ubuntu 16.04.2 LTS) :
everything is fine and the compile/link/run processes produce no errors.
pgfortran community edition 17.10-0 64-bit target on x86-64 Linux -tp sandybridge on a supercomputer(CentOS release 6.9 (Final)) :
when compiling file “parallel_kernel_mod.cuf”, it says:
“PGF90-F-0000-Internal compiler error. cf_data_init: unexpected datatype 673 (parallel_kernel_mod.cuf: 191)”
But Line 191 is END SUBROUTINE statement.
The content of parallel_kernel_mod.cuf shows as below:
MODULE PARALLEL_KERNEL_MOD
CONTAINS
attributes(global) SUBROUTINE PARALLEL_KERNEL(maxpar,zsg_size,metz_copy_size,ngrd,nsort,pgrd,dt,ktime,xpos,ypos,zpos,back,jet,zsg,nlvl,zmdl,meto_copy,tratio,ubar_copy,metz_copy,cdep,rdep,&
ifhr_copy,ichem,kgrid,zsfc_copy,umax_copy,page,hdwpx2_copy,kret_copy,kt1_copy,k1_copy,k2_copy,xx_copy,yy_copy,zz_copy,jtime_copy,&
lx1_copy,ly1_copy,nxs_copy,nys_copy,mtime_copy,fhour_copy,grid_global_copy,metval_copy,metval_index,KPM,iteration_cycled)
USE DEV_COMMON ! Common region variables on device
USE CXY2LL_DEV_MOD
IMPLICIT NONE
INCLUDE 'DEFMETCPYDEV.INC'
INCLUDE 'DEFMETO.INC'
INCLUDE 'KENELINC.INC'
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!-------------------------------------------------------------------------------
! argument list definitions
!-------------------------------------------------------------------------------
INTEGER, value :: maxpar
INTEGER, value :: zsg_size
INTEGER, value :: metz_copy_size
INTEGER, value :: ngrd
INTEGER, INTENT(IN) :: nsort(maxpar)
INTEGER, INTENT(INOUT) :: pgrd (maxpar)
REAL , value :: dt
INTEGER, INTENT(INOUT) :: ktime
REAL, INTENT(INOUT) :: xpos (maxpar)
REAL, INTENT(INOUT) :: ypos (maxpar)
REAL, INTENT(INOUT) :: zpos (maxpar)
LOGICAL, value :: back
INTEGER, value :: jet
REAL, INTENT(IN) :: zsg(zsg_size)
INTEGER, value :: nlvl
REAL, value :: zmdl
TYPE(aset),INTENT(INOUT) :: meto_copy(maxpar)
REAL , value :: tratio
REAL , INTENT(INOUT) :: ubar_copy(maxpar)
TYPE(bset), INTENT(INOUT):: metz_copy (metz_copy_size,maxpar)
LOGICAL, value :: cdep
LOGICAL, value :: rdep
INTEGER, INTENT(INOUT) :: ifhr_copy(maxpar)
INTEGER, value :: ichem
INTEGER, INTENT(INOUT) :: kgrid
REAL, INTENT(INOUT) :: zsfc_copy(maxpar)
REAL, INTENT(INOUT) :: umax_copy(maxpar)
INTEGER, INTENT(INOUT) :: page (maxpar)
INTEGER ,INTENT(IN):: hdwpx2_copy(maxpar)
INTEGER ,INTENT(INOUT):: kret_copy(maxpar)
INTEGER ,INTENT(IN):: kt1_copy(maxpar)
INTEGER ,INTENT(IN):: k1_copy(maxpar)
INTEGER ,INTENT(IN):: k2_copy(maxpar)
REAL ,INTENT(INOUT):: xx_copy(maxpar)
REAL ,INTENT(INOUT):: yy_copy(maxpar)
REAL ,INTENT(INOUT):: zz_copy(maxpar)
INTEGER ,INTENT(OUT):: jtime_copy(maxpar)
INTEGER, INTENT(IN) :: lx1_copy(ngrd,maxpar)
INTEGER, INTENT(IN) :: ly1_copy(ngrd,maxpar)
INTEGER, INTENT(IN) :: nxs_copy(ngrd,maxpar)
INTEGER, INTENT(IN) :: nys_copy(ngrd,maxpar)
INTEGER, INTENT(IN) :: mtime_copy(2,maxpar)
INTEGER, INTENT(IN) :: fhour_copy(2,ngrd,maxpar)
LOGICAL, INTENT(IN) :: grid_global_copy(0:mgrd,0:mtim,maxpar)
TYPE(METVAL_COPY_DEV_TYPE), INTENT(IN) :: metval_copy(maxpar)
INTEGER , INTENT(IN) :: metval_index(maxpar)
INTEGER, value :: kpm
LOGICAL, INTENT(IN) :: iteration_cycled (maxpar)
!!!!local variables in cuda kernel!!!!!!
INTEGER :: kpt
INTEGER :: kp
INTEGER :: METZ_SIZE
! Each GPU thread doing just one iteration,map threadid to iteration ID
KPT=threadIdx%x+(blockIdx%x-1)*blockDim%x
IF(KPT .LE. KPM) THEN
KP=NSORT(KPT)
IF(iteration_cycled(KP)) THEN
RETURN
END IF
IF(hdwpx2_copy(KP).LE.4)THEN
! standard 3D atmpospheric advection
CALL ADVIEC_DEV(metval_copy(metval_index(KP))%u,metval_copy(metval_index(KP))%v,metval_copy(metval_index(KP))%w, &
k1_copy(KP),k2_copy(KP),NLVL,mtime_copy(:,KP),JET,ZMDL,xx_copy(KP),yy_copy(KP),zz_copy(KP),METO_COPY(KP)%ZNDX,DT,TRATIO, &
DREC(PGRD(KP),kt1_copy(KP))%TAVRG,BACK, &
grid_global_copy(PGRD(KP),kt1_copy(KP),KP),GRID(PGRD(KP),kt1_copy(KP))%NX,GRID(PGRD(KP),kt1_copy(KP))%NY,PGRD,KP)
ELSEIF(hdwpx2_copy(KP).EQ.5)THEN
! test for special surface advecting particles
CALL ADVSFC_DEV(metval_copy(metval_index(KP))%UF,metval_copy(metval_index(KP))%VF, &
k1_copy(KP),k2_copy(KP),NLVL,mtime_copy(:,KP),JET,xx_copy(KP),yy_copy(KP),DT,DREC(PGRD(KP),kt1_copy(KP))%TAVRG,BACK, &
grid_global_copy(PGRD(KP),kt1_copy(KP),KP),GRID(PGRD(KP),kt1_copy(KP))%NX,GRID(PGRD(KP),kt1_copy(KP))%NY,PGRD,KP)
ELSEIF(hdwpx2_copy(KP).EQ.6)THEN
! lagrangian isobaric sampling
CALL ADVISO_DEV(metval_copy(metval_index(KP))%u,metval_copy(metval_index(KP))%v,metval_copy(metval_index(KP))%p,ZSG, &
k1_copy(KP),k2_copy(KP),NLVL,mtime_copy(:,KP),JET,ZMDL,xx_copy(KP),yy_copy(KP),zz_copy(KP),METO_COPY(KP)%ZNDX,DT,TRATIO, &
DREC(PGRD(KP),kt1_copy(KP))%TAVRG,BACK, &
grid_global_copy(PGRD(KP),kt1_copy(KP),KP),GRID(PGRD(KP),kt1_copy(KP))%NX,GRID(PGRD(KP),kt1_copy(KP))%NY, &
METO_COPY(KP)%UVEL,METO_COPY(KP)%VVEL,PGRD,KP)
ELSE
PGRD(KP)=0
kret_copy(KP)=1
STOP
END IF
! save advection distance as a wind speed (grid pts / min)
ubar_copy(KP) = MAX(ABS(xx_copy(KP)-(XPOS(KP)-lx1_copy(PGRD(KP),KP)+1)),ABS(yy_copy(KP)-(YPOS(KP)-ly1_copy(PGRD(KP),KP)+1)))/ABS(DT)
! check for cyclic boundary conditions
kret_copy(KP)=0
IF(grid_global_copy(PGRD(KP),kt1_copy(KP),KP))THEN
IF(xx_copy(KP).GE.FLOAT(GRID(PGRD(KP),kt1_copy(KP))%NX+1)) xx_copy(KP)=xx_copy(KP)-FLOAT(GRID(PGRD(KP),kt1_copy(KP))%NX)
IF(xx_copy(KP).LT.1.0) xx_copy(KP)=GRID(PGRD(KP),kt1_copy(KP))%NX+xx_copy(KP)
IF(yy_copy(KP).GT.FLOAT(GRID(PGRD(KP),kt1_copy(KP))%NY)) yy_copy(KP)=2.0*GRID(PGRD(KP),kt1_copy(KP))%NY-yy_copy(KP)
IF(yy_copy(KP).LT.1.0) yy_copy(KP)=2.0-yy_copy(KP)
ELSE
! This condition should never occur after the advection step because
! limits have been set to switch particles when too close to edge of
! subgrid prior to calling the advection step. However, some runs
! may have problems if the time step is to large or winds too strong.
IF(xx_copy(KP).LT.1.0.OR.yy_copy(KP).LT.1.0.OR.xx_copy(KP).GT.FLOAT(nxs_copy(PGRD(KP),KP)).OR. &
yy_copy(KP).GT.FLOAT(nys_copy(PGRD(KP),KP))) kret_copy(KP)=1
END IF
! map position back to meteo grid
XPOS(KP)=xx_copy(KP)+lx1_copy(PGRD(KP),KP)-1
YPOS(KP)=yy_copy(KP)+ly1_copy(PGRD(KP),KP)-1
ZPOS(KP)=MIN(1.0,zz_copy(KP))
IF(kret_copy(KP).EQ.1)THEN
!WRITE(*,*)"particle off-grid after advection step!This condition should never occur.Consider reducing time step or increasing subgrid size!"
STOP
END IF
IF(GRID(PGRD(KP),kt1_copy(KP))%LATLON)THEN
CALL GBL2LL_DEV(PGRD(KP),kt1_copy(KP),XPOS(KP),YPOS(KP),METO_COPY(KP)%PLAT,METO_COPY(KP)%PLON)
ELSE
CALL CXY2LL_DEV(GRID(PGRD(KP),kt1_copy(KP))%GBASE,XPOS(KP),YPOS(KP),METO_COPY(KP)%PLAT,METO_COPY(KP)%PLON)
END IF
jtime_copy(KP)=JET+NINT(DT)
METZ_SIZE=SIZE(metz_copy,1)
CALL ADVMET_DEV(metz_copy(:,KP),METO_COPY(KP),BACK,.TRUE.,CDEP,RDEP,.FALSE.,xx_copy(KP),yy_copy(KP),jtime_copy(KP),mtime_copy(:,KP), &
DREC(PGRD(KP),kt1_copy(KP))%TAVRG, &
DREC(PGRD(KP),kt1_copy(KP))%ACYCLE,NLVL,fhour_copy(:,PGRD(KP),KP),ifhr_copy(KP),k1_copy(KP),k2_copy(KP),metval_copy(metval_index(KP))%GX, &
metval_copy(metval_index(KP))%GY,metval_copy(metval_index(KP))%Z0,metval_copy(metval_index(KP))%LU,metval_copy(metval_index(KP))%ZT, &
metval_copy(metval_index(KP))%a,metval_copy(metval_index(KP))%t,metval_copy(metval_index(KP))%q,metval_copy(metval_index(KP))%p, &
metval_copy(metval_index(KP))%e,metval_copy(metval_index(KP))%x,metval_copy(metval_index(KP))%ZI, metval_copy(metval_index(KP))%h, &
metval_copy(metval_index(KP))%U0,metval_copy(metval_index(KP))%V0,metval_copy(metval_index(KP))%RT,metval_copy(metval_index(KP))%CF, &
metval_copy(metval_index(KP))%UF,metval_copy(metval_index(KP))%VF,metval_copy(metval_index(KP))%TL, &
metval_copy(metval_index(KP))%SF, metval_copy(metval_index(KP))%SS,metval_copy(metval_index(KP))%DS,DREC(PGRD(KP),kt1_copy(KP))%DSWF, &
grid_global_copy(PGRD(KP),kt1_copy(KP),KP),GRID(PGRD(KP),kt1_copy(KP))%NX,GRID(PGRD(KP),kt1_copy(KP))%NY, &
metval_copy(metval_index(KP))%T0,metval_copy(metval_index(KP))%p0,metval_copy(metval_index(KP))%H0,metval_copy(metval_index(KP))%CP,metval_copy(metval_index(KP))%SRT, &
ICHEM,PGRD,KP,METZ_SIZE)
! test if particle above the model top
IF(.FALSE..AND.ZPOS(KP).LT.ZSG(NLVL))THEN
! terminate trajectories
PGRD(KP)=0
kret_copy(KP)=1
STOP
ELSE
! maintain particles (full reflection turbulence assumed)
ZPOS(KP)=MAX(ZSG(NLVL),ZPOS(KP))
END IF
kret_copy(KP)=0
!-------------------------------------------------------------------------------
! inlined advpnt subroutine END
!-------------------------------------------------------------------------------
! IF (kret_copy(KP).NE.0) STOP
! convert to (km/min) = (gp/min) (km/gp)
ubar_copy(KP)=ubar_copy(KP)*GRID(KGRID,KTIME)%SIZE
KGRID=PGRD(KP)
zsfc_copy(KP)=METO_COPY(KP)%ZTER
umax_copy(KP)=MAX(0.060, umax_copy(KP), ubar_copy(KP))
! increment particle age after advection ...
! additional tests required when puff dispersion shuts down because
! the age is set to negative as an indicator for no puff dispersion
! stilt mode (ichem.eq.8) used to sum time in boundary layer
IF(ICHEM.NE.8) PAGE(KP)=SIGN( (ABS(PAGE(KP))+NINT(ABS(DT))), PAGE(KP) )
END IF
END SUBROUTINE PARALLEL_KERNEL
END MODULE PARALLEL_KERNEL_MOD
By the way, the code compile successfully when using -Mcuda=emu flag on pgfortran 17.10ce.
Thanks for help!