Hello,
when trying to compile my program I got the error:
NVFORTRAN-F-0000-Internal compiler error. stack error while lowering 7 (/home/ivan/lrz/rbfxlbm/examples/acc_lbm.F90: 521)
NVFORTRAN/x86-64 Linux 24.1-0: compilation aborted
The procedure at line 521 is part of a larger module. I isolated the procedure into a separate file:
subroutine d2q9_rr(n,ln,f,rho,uu,vv,omega)
implicit none
integer, parameter :: wp = kind(1.0d0)
real(wp), parameter :: w0 = 4.0_wp/9.0_wp
real(wp), parameter :: ws = 1.0_wp/9.0_wp
real(wp), parameter :: wd = 1.0_wp/36.0_wp
real(wp), parameter :: csqr = 1.0_wp / 3.0_wp
integer, intent(in) :: n, ln
real(wp), intent(inout) :: f(ln,0:8)
real(wp), intent(out) :: rho(n), uu(n), vv(n)
real(wp), intent(in) :: omega
real(wp) :: omega_w0, omega_ws, omega_wd
real(wp) :: vC, vE, vN, vW, vS, vNE, vNW, vSW, vSE
real(wp) :: teq(0:8)
real(wp) :: ux, uy, uxx, uyy, uxxy, uyyx, uxxyy
real(wp) :: uxpy, uxmy, u3p, u3m
real(wp) :: indp0, indps, indpd, indp57, indp68
real(wp) :: axx, axy, ayy, axxy, ayyx, axxyy, tmp, a3p, a3m
integer :: i
omega_w0 = w0*(1.0_wp - omega)
omega_ws = ws*(1.0_wp - omega)
omega_wd = wd*(1.0_wp - omega)
!$omp parallel do default(private) shared(n,ln,f,rho,uu,vv) &
!$omp firstprivate(omega_w0,omega_ws,omega_wd)
do i = 1, n
! pull pdfs
vC = f(i,0)
VE = f(i,1)
vN = f(i,2)
vW = f(i,3)
vS = f(i,4)
vNE = f(i,5)
vNW = f(i,6)
vSW = f(i,7)
vSE = f(i,8)
!
! macroscopic values
!
rho(i) = (((vNE + vSW) + (vNW + vSE)) + &
((vE + vW) + (vN + vS))) + vC
ux = (((vNE - vSW) + (vSE - vNW)) + (vE - vW)) / rho(i)
uy = (((vNE - vSW) + (vNW - vSE)) + (vN - vS)) / rho(i)
uu(i) = ux
vv(i) = uy
uxx = ux*ux
uyy = uy*uy
uxxy = uxx*uy
uyyx = uyy*ux
uxxyy = uxx*uyy
indp0 = 1.0_wp - 1.5_wp * (uxx + uyy)
indps = indp0 - 4.5_wp*uxxyy
indpd = indp0 + 9.0_wp*uxxyy
indp0 = indp0 + 2.25_wp*uxxyy
!DIR$ DISTRIBUTE POINT
!
! equilibrium parts, 0 - 4
!
teq(0) = w0*rho(i)*indp0
teq(1) = ws*rho(i)*(indps + 3.0_wp*ux + 4.5_wp*(uxx - uyyx))
teq(3) = ws*rho(i)*(indps - 3.0_wp*ux + 4.5_wp*(uxx + uyyx))
teq(2) = ws*rho(i)*(indps + 3.0_wp*uy + 4.5_wp*(uyy - uxxy))
teq(4) = ws*rho(i)*(indps - 3.0_wp*uy + 4.5_wp*(uyy + uxxy))
!
! non-equilibrium parts, 0 - 4
!
vC = vC - teq(0)
vE = vE - teq(1)
vN = vN - teq(2)
vW = vW - teq(3)
vS = vS - teq(4)
axx = csqr*(2*(vE + vW) - (vN + vS) - vC)
ayy = csqr*(2*(vN + vS) - (vE + vW) - vC)
!
! equilibrium parts, 5 - 8
!
u3p = uxxy + uyyx
uxpy = ux + uy
indp57 = indpd + 4.5_wp*uxpy*uxpy
teq(5) = wd*rho(i)*(indp57 + 3.0_wp*uxpy + 9.0_wp*u3p)
teq(7) = wd*rho(i)*(indp57 - 3.0_wp*uxpy - 9.0_wp*u3p)
u3m = uxxy - uyyx
uxmy = ux - uy
indp68 = indpd + 4.5_wp*uxmy*uxmy
teq(6) = wd*rho(i)*(indp68 - 3.0_wp*uxmy + 9.0_wp*u3m)
teq(8) = wd*rho(i)*(indp68 + 3.0_wp*uxmy - 9.0_wp*u3m)
! ----------------------
!
! non-equilibrium parts, 5 - 8
!
vNE = vNE - teq(5)
vNW = vNW - teq(6)
vSW = vSW - teq(7)
vSE = vSE - teq(8)
tmp = 2.0_wp*csqr*(vNE + vNW + vSW + vSE)
axx = axx + tmp
ayy = ayy + tmp
axy = ((vNE + vSW) - (vNW + vSE))
axxy = 2.0_wp*ux*axy + uy*axx
ayyx = 2.0_wp*uy*axy + ux*ayy
axxyy = 2.0_wp*(ux*ayyx + uy*axxy) - uxx*ayy - uyy*axx - 4.0_wp*ux*uy*axy
indp0 = -1.5_wp*(axx + ayy)
indps = indp0 - 4.5_wp*axxyy
indpd = 9.0_wp*axxyy - 2.0_wp*indp0
indp0 = indp0 + 2.25_wp*axxyy
vC = indp0
vE = indps + 4.5_wp*(axx - ayyx)
vW = indps + 4.5_wp*(axx + ayyx)
vN = indps + 4.5_wp*(ayy - axxy)
vS = indps + 4.5_wp*(ayy + axxy)
vNE = indpd + 9.0_wp*(axxy + ayyx + axy)
vSW = indpd - 9.0_wp*(axxy + ayyx - axy)
vNW = indpd + 9.0_wp*(axxy - ayyx - axy)
vSE = indpd - 9.0_wp*(axxy - ayyx + axy)
f(i,0) = teq(0) + omega_w0 * vC
f(i,1) = teq(1) + omega_ws * vE
f(i,2) = teq(2) + omega_ws * vN
f(i,3) = teq(3) + omega_ws * vW
f(i,4) = teq(4) + omega_ws * vS
f(i,5) = teq(5) + omega_wd * vNE
f(i,6) = teq(6) + omega_wd * vNW
f(i,7) = teq(7) + omega_wd * vSW
f(i,8) = teq(8) + omega_wd * vSE
end do
end subroutine
The ICE occurs when I add the -mp=multicore
flag:
$ nvfortran -O2 -fast -c d2q9_rr.f90
$ nvfortran -O2 -fast -mp=multicore -c d2q9_rr.f90
NVFORTRAN-F-0000-Internal compiler error. stack error while lowering 7 (d2q9_rr.f90: 34)
NVFORTRAN/x86-64 Linux 24.1-0: compilation aborted
I had the suspicion that the Intel Fortran directive !DIR$ DISTRIBUTE POINT
at line 71 may have something to do with it. And indeed, after I removed the directive the file compiles.
I couldn’t find any mention of the !DIR$
directive syntax in the HPC SDK Documentation, so it’s a bit surprising that nvfortran is affected. I would expect it to simply skip the directive or issue a warning about unsupported directive syntax.