Hi.
Long time no see. I’ve been working on something else.
Let’s continue with the code I’ve been talking about through Feburary and March.
After struggling with finding bugs and work arounds, I could finally make my code work, but unfortunately, in debug mode only.
In release mode, the result is incorrect.
Please refer to the code below.
What causes trouble is the indicated worker loop.
As you can see, I’m using worker size of 2.
If I use worker size 1, the result is correct; that is, that worker loop is not properly parallelized in release mode.
Manual privatization of phiobd, PhiAngOut, PhidRay did not help.
What could be the cause?
#include <defines.h>
SUBROUTINE RayTrace_GPU(RayInfo, CoreInfo, phis, PhiAngIn, xst, src, jout, iz, mygb, myge, ljout)
USE PARAM
USE TYPEDEF, ONLY : RayInfo_Type, Coreinfo_type
USE MOC_MOD, ONLY : nMaxRaySeg, nMaxCellRay, nMaxAsyRay, nMaxCoreRay, &
EXPAPolar, EXPBPolar, wtangP0
USE PE_MOD, ONLY : PE, GPUControl
IMPLICIT NONE
TYPE(RayInfo_Type) :: RayInfo
TYPE(CoreInfo_Type) :: CoreInfo
!$ACC DECLARE PRESENT(RayInfo, CoreInfo, GPUControl, EXPAPolar, EXPBPolar, wtangP0)
REAL(DP), POINTER :: phis(:, :), PhiAngIn(:, :, :), xst(:, :), src(:, :), jout(:, :, :, :)
INTEGER :: iz, mygb, myge
LOGICAL :: ljout
INTEGER :: iRay
INTEGER :: i, j, k, l, m, jbeg, jend, jinc, irg, irw, irw1, irv, ig
REAL(DP) :: wt(RayInfo%nPolarAngle), tau
REAL(DP) :: phiobd(Rayinfo%nPolarAngle, mygb : myge), phid, phiocel1, phiocel2
INTEGER :: iazi, ipol, PhiAnginSvIdx, PhiAngOutSvIdx
INTEGER :: nCoreRay, nAsyRay, nPinRay, nRaySeg
INTEGER :: irotray, icoreray, iasyray, iceray, irayseg
INTEGER :: ipin, icelg, icelv, iasy, ireg, isurf1, isurf2, irot, idir, ifsr
INTEGER :: irsegidx, icellrayidx, FsrIdxSt
INTEGER :: mp(2) = (/ 2, 1 /)
! Tracking Data Storages
INTEGER :: nTotRaySeg(nMaxCoreRay), nTotCellRay(nMaxCoreRay)
INTEGER :: CellRayIdxSt(nMaxCellRay, nMaxCoreRay, 2)
INTEGER :: PinIdx(nMaxCellRay, nMaxCoreRay)
INTEGER :: SurfIdx(nMaxCellRay, nMaxCoreRay, 2)
INTEGER :: ExpAppIdx(mygb : myge, nMaxRaySeg, nMaxCoreRay)
INTEGER :: FsrIdx(nMaxRaySeg, nMaxCoreRay)
REAL(DP) :: ExpApp(RayInfo%nPolarAngle, mygb : myge, nMaxRaySeg, nMaxCoreRay)
REAL(DP) :: OptLenList(mygb : myge, nMaxRaySeg, nMaxCoreRay)
REAL(DP) :: PhiAngOut(RayInfo%nPolarAngle, mygb : myge, nMaxRaySeg + 2)
REAL(DP) :: PhidRay(RayInfo%nPolarAngle, mygb : myge, nMaxRaySeg)
!$ACC ENTER DATA COPYIN(xst(mygb : myge, :), src(mygb : myge, :), PhiAngIn(:, mygb : myge, :))
!$ACC ENTER DATA CREATE(phis(mygb : myge, :), Jout(mygb : myge, :, :, :))
!$ACC DATA PRESENT(phis(mygb : myge, :), Jout(mygb : myge, :, :, :))
!$ACC KERNELS
phis(mygb : myge, :) = 0
!$ACC END KERNELS
IF (ljout) THEN
!$ACC KERNELS
jout(mygb : myge, :, :, :) = 0
!$ACC END KERNELS
ENDIF
!$ACC END DATA
!$ACC DATA PRESENT(xst(mygb : myge, :), src(mygb : myge, :), PhiAngIn(:, mygb : myge, :), &
!$ACC phis(mygb : myge, :), Jout(mygb : myge, :, :, :))
!$ACC PARALLEL NUM_GANGS(GPUControl(1)%nGang) NUM_WORKERS(2) VECTOR_LENGTH(GPUControl(1)%nVector)
!$ACC LOOP INDEPENDENT GANG PRIVATE(irsegidx, icellrayidx, nTotRaySeg, nTotCellRay, CellRayIdxSt, &
!$ACC PinIdx, SurfIdx, ExpAppIdx, FsrIdx, ExpApp, OptLenList)
DO iRay = 1, GPUControl(1)%nRay
!$ACC CACHE(nTotRaySeg, nTotCellRay)
iRotRay = GPUControl(1)%RayList(iRay)
!!!!!!!!!!!!!!!! Inlined Tracking Subroutine !!!!!!!!!!!!!!!!
nCoreRay = RayInfo%RotRay(iRotRay)%nRay
!$ACC LOOP SEQ
DO j = 1, nCoreRay
irsegidx = 0; icellrayidx = 0
iCoreRay = RayInfo%RotRay(iRotRay)%RayIdx(j)
nAsyRay = RayInfo%CoreRay(iCoreRay)%nRay
!$ACC LOOP SEQ
DO k = 1, nAsyRay
iasyray = RayInfo%CoreRay(iCoreRay)%AsyRayIdx(k)
iasy = RayInfo%CoreRay(iCoreRay)%AsyIdx(k)
IF(iasy .EQ. 0) CYCLE
nPinRay = RayInfo%AsyRay(iAsyRay)%nCellRay
!$ACC LOOP SEQ
DO l = 1, nPinRay
ipin = RayInfo%AsyRay(iAsyRay)%PinIdx(l)
iceray = RayInfo%AsyRay(iAsyRay)%PinRayIdx(l)
ipin = CoreInfo%Asy(iAsy)%GlobalPinIdx(ipin)
icelg = CoreInfo%Pin(ipin)%Cell(iz)
FsrIdxSt = CoreInfo%Pin(ipin)%FsrIdxSt
irg = icellrayidx + l
PinIdx(irg, j) = ipin
CellRayIdxSt(irg, j, 2) = irsegidx + 1
nRaySeg = CoreInfo%CellInfo(icelg)%CellRay(iceray)%nSeg
!$ACC LOOP INDEPENDENT WORKER VECTOR
DO iRaySeg = 1, nRaySeg
irv = irsegidx + iRaySeg
ireg = FsrIdxSt + CoreInfo%CellInfo(icelg)%CellRay(iceray)%LocalFsrIdx(iRaySeg) - 1
FsrIdx(irv, j) = ireg
!$ACC LOOP SEQ
DO ig = mygb, myge
tau = - CoreInfo%CellInfo(icelg)%CellRay(iceray)%LenSeg(iRaySeg) * xst(ig, ireg)
OptLenList(ig, irv, j) = tau
ExpAppIdx(ig, irv, j) = min(0, max(INT(tau), -40000))
ENDDO
ENDDO
irsegidx = irsegidx + nRaySeg
CellRayIdxSt(irg, j, 1) = irsegidx
SurfIdx(irg, j, 1) = RayInfo%AsyRay(iAsyRay)%PinRaySurf(2, l)
SurfIdx(irg, j, 2) = RayInfo%AsyRay(iAsyRay)%PinRaySurf(1, l)
ENDDO
icellrayidx = icellrayidx + nPinRay
ENDDO
nTotRaySeg(j) = irsegidx
nTotCellRay(j) = icellRayIdx
!$ACC LOOP INDEPENDENT WORKER
DO iRaySeg = 1, nTotRaySeg(j)
!$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
DO ig = mygb, myge
DO ipol = 1, RayInfo%nPolarAngle
ExpApp(ipol, ig, iRaySeg, j) &
= EXPAPolar(ipol, ExpAppIdx(ig, iRaySeg, j)) * OptLenList(ig, iRaySeg, j) + EXPBPolar(ipol, ExpAppIdx(ig, iRaySeg, j))
ENDDO
ENDDO
ENDDO
ENDDO
!!!!!!! Problem in this worker loop !!!!!!!
!$ACC LOOP INDEPENDENT WORKER PRIVATE(wt, phiobd, PhiAngOut, PhidRay)
DO irot = 1, 2
!$ACC CACHE(wt)
PhiAnginSvIdx = RayInfo%PhiAngInSvIdx(iRotRay, irot)
PhiAngOutSvIdx = RayInfo%PhiangOutSvIdx(iRotRay, irot)
phiobd(:, mygb : myge) = PhiAngIn(:, mygb : myge, PhiAnginSvIdx)
jinc = 1; jbeg = 1; jend = nCoreRay
IF(irot .EQ. 2) THEN
jinc = -1; jbeg = nCoreRay; jend = 1
ENDIF
!$ACC LOOP SEQ
DO j = jbeg, jend, jinc
idir = RayInfo%RotRay(iRotRay)%DIR(j);
iazi = RayInfo%CoreRay(RayInfo%RotRay(iRotRay)%RayIdx(j))%iang
wt(1 : RayInfo%nPolarAngle) = wtangP0(1 : RayInfo%nPolarAngle, iazi)
IF(irot .EQ. 2) idir = mp(idir)
nRaySeg = nTotRaySeg(j)
IF(idir .EQ. 1) THEN
PhiAngOut(:, mygb : myge, 1) = phiobd(:, mygb : myge)
!$ACC LOOP SEQ
DO irw = 1, nRaySeg
!$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
DO ig = mygb, myge
DO ipol = 1, RayInfo%nPolarAngle
phid = (PhiAngOut(ipol, ig, irw) - src(ig, FsrIdx(irw, j))) * ExpApp(ipol, ig, irw, j)
PhiAngOut(ipol, ig, irw + 1) = PhiAngOut(ipol, ig, irw) - phid
PhidRay(ipol, ig, irw) = wt(ipol) * phid
ENDDO
ENDDO
ENDDO
phiobd(:, mygb : myge) = PhiAngOut(:, mygb : myge, nRaySeg + 1)
IF(ljout) THEN
!$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
DO irv = 1, nTotCellRay(j)
DO ig = mygb, myge
icelv = PinIdx(irv, j); isurf1 = SurfIdx(irv, j, 1); isurf2 = SurfIdx(irv, j, 2)
phiocel1 = 0; phiocel2 = 0
!$ACC LOOP SEQ
DO ipol = 1, RayInfo%nPolarAngle
phiocel1 = phiocel1 + wt(ipol) * PhiAngOut(ipol, ig, CellRayIdxSt(irv, j, 1) + 1)
phiocel2 = phiocel2 + wt(ipol) * PhiAngOut(ipol, ig, CellRayIdxSt(irv, j, 2))
ENDDO
!$ACC ATOMIC UPDATE
Jout(ig, 2, isurf1, icelv) = Jout(ig, 2, isurf1, icelv) + phiocel1
!$ACC END ATOMIC
!$ACC ATOMIC UPDATE
Jout(ig, 1, isurf2, icelv) = Jout(ig, 1, isurf2, icelv) + phiocel2
!$ACC END ATOMIC
ENDDO
ENDDO
ENDIF
ELSE
PhiAngOut(:, mygb : myge, nRaySeg + 2) = phiobd(:, mygb : myge)
!$ACC LOOP SEQ
DO irw = nRaySeg, 1, -1
!$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
DO ig = mygb, myge
DO ipol = 1, RayInfo%nPolarAngle
phid = (PhiAngOut(ipol, ig, irw + 2) - src(ig, FsrIdx(irw, j))) * ExpApp(ipol, ig, irw, j)
PhiAngOut(ipol, ig, irw + 1) = PhiAngOut(ipol, ig, irw + 2) - phid
PhidRay(ipol, ig, irw) = wt(ipol) * phid
ENDDO
ENDDO
ENDDO
phiobd(:, mygb : myge) = PhiAngOut(:, mygb : myge, 2)
IF(lJout) THEN
!$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
DO irv = 1, nTotCellRay(j)
DO ig = mygb, myge
icelv = PinIdx(irv, j); isurf1 = SurfIdx(irv, j, 1); isurf2 = SurfIdx(irv, j, 2)
phiocel1 = 0; phiocel2 = 0
!$ACC LOOP SEQ
DO ipol = 1, RayInfo%nPolarAngle
phiocel1 = phiocel1 + wt(ipol) * PhiAngOut(ipol, ig, CellRayIdxSt(irv, j, 1) + 2)
phiocel2 = phiocel2 + wt(ipol) * PhiAngOut(ipol, ig, CellRayIdxSt(irv, j, 2) + 1)
ENDDO
!$ACC ATOMIC UPDATE
Jout(ig, 1, isurf1, icelv) = Jout(ig, 1, isurf1, icelv) + phiocel1
!$ACC END ATOMIC
!$ACC ATOMIC UPDATE
Jout(ig, 2, isurf2, icelv) = Jout(ig, 2, isurf2, icelv) + phiocel2
!$ACC END ATOMIC
ENDDO
ENDDO
ENDIF
ENDIF
!$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
DO irv = 1, nRaySeg
DO ig = mygb, myge
!$ACC LOOP SEQ
DO ipol = 1, RayInfo%nPolarAngle
!$ACC ATOMIC UPDATE
phis(ig, FsrIdx(irv, j)) = phis(ig, FsrIdx(irv, j)) + PhidRay(ipol, ig, irv)
!$ACC END ATOMIC
ENDDO
ENDDO
ENDDO
ENDDO
PhiAngIn(:, mygb : myge, PhiAngOutSvIdx) = phiobd(:, mygb : myge)
ENDDO
!!!!!!!!!!!!!!!! Inlined Tracking Subroutine !!!!!!!!!!!!!!!!
ENDDO
!$ACC END PARALLEL
!$ACC END DATA
!$ACC DATA PRESENT(xst(mygb : myge, :), src(mygb : myge, :), phis(mygb : myge, :))
!$ACC PARALLEL
!$ACC LOOP INDEPENDENT GANG
DO j = 1, CoreInfo%nxy
FsrIdxSt = CoreInfo%Pin(j)%FsrIdxSt; icelg = CoreInfo%Pin(j)%Cell(iz)
!$ACC LOOP INDEPENDENT COLLAPSE(2) WORKER VECTOR
DO i = 1, CoreInfo%CellInfo(icelg)%nFsr
DO ig = mygb, myge
ireg = FsrIdxSt + i - 1
phis(ig, ireg) = phis(ig, ireg) / (xst(ig, ireg) * CoreInfo%CellInfo(icelg)%vol(i)) + src(ig, ireg)
ENDDO
ENDDO
ENDDO
!$ACC END PARALLEL
!$ACC END DATA
!$ACC EXIT DATA DELETE(xst(mygb : myge, :), src(mygb : myge, :))
!$ACC EXIT DATA COPYOUT(phis(mygb : myge, :), Jout(mygb : myge, :, :, :), PhiAngIn(:, mygb : myge, :))
PAUSE
END SUBROUTINE