Correct in debug, but incorrect in release

Hi.

Long time no see. I’ve been working on something else.

Let’s continue with the code I’ve been talking about through Feburary and March.

After struggling with finding bugs and work arounds, I could finally make my code work, but unfortunately, in debug mode only.

In release mode, the result is incorrect.

Please refer to the code below.

What causes trouble is the indicated worker loop.

As you can see, I’m using worker size of 2.

If I use worker size 1, the result is correct; that is, that worker loop is not properly parallelized in release mode.

Manual privatization of phiobd, PhiAngOut, PhidRay did not help.

What could be the cause?

#include <defines.h>
SUBROUTINE RayTrace_GPU(RayInfo, CoreInfo, phis, PhiAngIn, xst, src, jout, iz, mygb, myge, ljout)
USE PARAM
USE TYPEDEF, ONLY : RayInfo_Type, Coreinfo_type
USE MOC_MOD, ONLY : nMaxRaySeg,     nMaxCellRay,    nMaxAsyRay,     nMaxCoreRay,    &
                    EXPAPolar,      EXPBPolar,      wtangP0
USE PE_MOD,  ONLY : PE, GPUControl
IMPLICIT NONE
TYPE(RayInfo_Type) :: RayInfo
TYPE(CoreInfo_Type) :: CoreInfo
!$ACC DECLARE PRESENT(RayInfo, CoreInfo, GPUControl, EXPAPolar, EXPBPolar, wtangP0)
REAL(DP), POINTER :: phis(:, :), PhiAngIn(:, :, :), xst(:, :), src(:, :), jout(:, :, :, :)
INTEGER :: iz, mygb, myge
LOGICAL :: ljout

INTEGER :: iRay
INTEGER :: i, j, k, l, m, jbeg, jend, jinc, irg, irw, irw1, irv, ig

REAL(DP) :: wt(RayInfo%nPolarAngle), tau
REAL(DP) :: phiobd(Rayinfo%nPolarAngle, mygb : myge), phid, phiocel1, phiocel2
INTEGER :: iazi, ipol, PhiAnginSvIdx, PhiAngOutSvIdx
INTEGER :: nCoreRay, nAsyRay, nPinRay, nRaySeg
INTEGER :: irotray, icoreray, iasyray, iceray, irayseg
INTEGER :: ipin, icelg, icelv, iasy, ireg, isurf1, isurf2, irot, idir, ifsr
INTEGER :: irsegidx, icellrayidx, FsrIdxSt

INTEGER :: mp(2) = (/ 2, 1 /)

! Tracking Data Storages
INTEGER :: nTotRaySeg(nMaxCoreRay), nTotCellRay(nMaxCoreRay)
INTEGER :: CellRayIdxSt(nMaxCellRay, nMaxCoreRay, 2)
INTEGER :: PinIdx(nMaxCellRay, nMaxCoreRay)
INTEGER :: SurfIdx(nMaxCellRay, nMaxCoreRay, 2)
INTEGER :: ExpAppIdx(mygb : myge, nMaxRaySeg, nMaxCoreRay)
INTEGER :: FsrIdx(nMaxRaySeg, nMaxCoreRay)
REAL(DP) :: ExpApp(RayInfo%nPolarAngle, mygb : myge, nMaxRaySeg, nMaxCoreRay)
REAL(DP) :: OptLenList(mygb : myge, nMaxRaySeg, nMaxCoreRay)
REAL(DP) :: PhiAngOut(RayInfo%nPolarAngle, mygb : myge, nMaxRaySeg + 2)
REAL(DP) :: PhidRay(RayInfo%nPolarAngle, mygb : myge, nMaxRaySeg)

!$ACC ENTER DATA COPYIN(xst(mygb : myge, :), src(mygb : myge, :), PhiAngIn(:, mygb : myge, :))
!$ACC ENTER DATA CREATE(phis(mygb : myge, :), Jout(mygb : myge, :, :, :))

!$ACC DATA PRESENT(phis(mygb : myge, :), Jout(mygb : myge, :, :, :))
!$ACC KERNELS
  phis(mygb : myge, :) = 0
!$ACC END KERNELS
IF (ljout) THEN
  !$ACC KERNELS
    jout(mygb : myge, :, :, :) = 0
  !$ACC END KERNELS
ENDIF
!$ACC END DATA

!$ACC DATA PRESENT(xst(mygb : myge, :), src(mygb : myge, :), PhiAngIn(:, mygb : myge, :),                        &
!$ACC              phis(mygb : myge, :), Jout(mygb : myge, :, :, :))
!$ACC PARALLEL NUM_GANGS(GPUControl(1)%nGang) NUM_WORKERS(2) VECTOR_LENGTH(GPUControl(1)%nVector)
!$ACC LOOP INDEPENDENT GANG PRIVATE(irsegidx, icellrayidx, nTotRaySeg, nTotCellRay, CellRayIdxSt,                &
!$ACC                               PinIdx, SurfIdx, ExpAppIdx, FsrIdx, ExpApp, OptLenList)
DO iRay = 1, GPUControl(1)%nRay
  !$ACC CACHE(nTotRaySeg, nTotCellRay)
  iRotRay = GPUControl(1)%RayList(iRay)

  !!!!!!!!!!!!!!!! Inlined Tracking Subroutine !!!!!!!!!!!!!!!!
      
  nCoreRay = RayInfo%RotRay(iRotRay)%nRay
  !$ACC LOOP SEQ
  DO j = 1, nCoreRay         
    irsegidx = 0; icellrayidx = 0
    iCoreRay = RayInfo%RotRay(iRotRay)%RayIdx(j)
    nAsyRay = RayInfo%CoreRay(iCoreRay)%nRay
    !$ACC LOOP SEQ
    DO k = 1, nAsyRay 
      iasyray = RayInfo%CoreRay(iCoreRay)%AsyRayIdx(k)
      iasy = RayInfo%CoreRay(iCoreRay)%AsyIdx(k)
      IF(iasy .EQ. 0) CYCLE
      nPinRay = RayInfo%AsyRay(iAsyRay)%nCellRay
      !$ACC LOOP SEQ
      DO l = 1, nPinRay
        ipin = RayInfo%AsyRay(iAsyRay)%PinIdx(l)
        iceray = RayInfo%AsyRay(iAsyRay)%PinRayIdx(l)
        ipin = CoreInfo%Asy(iAsy)%GlobalPinIdx(ipin)
        icelg = CoreInfo%Pin(ipin)%Cell(iz)
        FsrIdxSt = CoreInfo%Pin(ipin)%FsrIdxSt
        irg = icellrayidx + l
        PinIdx(irg, j) = ipin
        CellRayIdxSt(irg, j, 2) = irsegidx + 1
        nRaySeg = CoreInfo%CellInfo(icelg)%CellRay(iceray)%nSeg
        !$ACC LOOP INDEPENDENT WORKER VECTOR
        DO iRaySeg = 1, nRaySeg
          irv = irsegidx + iRaySeg
          ireg = FsrIdxSt + CoreInfo%CellInfo(icelg)%CellRay(iceray)%LocalFsrIdx(iRaySeg) - 1
          FsrIdx(irv, j) = ireg
          !$ACC LOOP SEQ
          DO ig = mygb, myge
            tau = - CoreInfo%CellInfo(icelg)%CellRay(iceray)%LenSeg(iRaySeg) * xst(ig, ireg)
            OptLenList(ig, irv, j) = tau
            ExpAppIdx(ig, irv, j) = min(0, max(INT(tau), -40000))
          ENDDO
        ENDDO
        irsegidx = irsegidx + nRaySeg
        CellRayIdxSt(irg, j, 1) = irsegidx
        SurfIdx(irg, j, 1) = RayInfo%AsyRay(iAsyRay)%PinRaySurf(2, l)
        SurfIdx(irg, j, 2) = RayInfo%AsyRay(iAsyRay)%PinRaySurf(1, l)
      ENDDO
      icellrayidx = icellrayidx + nPinRay
    ENDDO
    nTotRaySeg(j) = irsegidx
    nTotCellRay(j) = icellRayIdx
    !$ACC LOOP INDEPENDENT WORKER
    DO iRaySeg = 1, nTotRaySeg(j)
      !$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
      DO ig = mygb, myge
        DO ipol = 1, RayInfo%nPolarAngle
          ExpApp(ipol, ig, iRaySeg, j) &
          = EXPAPolar(ipol, ExpAppIdx(ig, iRaySeg, j)) * OptLenList(ig, iRaySeg, j) + EXPBPolar(ipol, ExpAppIdx(ig, iRaySeg, j))
        ENDDO
      ENDDO
    ENDDO
  ENDDO

  !!!!!!! Problem in this worker loop !!!!!!!

  !$ACC LOOP INDEPENDENT WORKER PRIVATE(wt, phiobd, PhiAngOut, PhidRay)
  DO irot = 1, 2
    !$ACC CACHE(wt)
    PhiAnginSvIdx = RayInfo%PhiAngInSvIdx(iRotRay, irot)
    PhiAngOutSvIdx = RayInfo%PhiangOutSvIdx(iRotRay, irot)
    phiobd(:, mygb : myge) = PhiAngIn(:, mygb : myge, PhiAnginSvIdx)
    jinc = 1; jbeg = 1; jend = nCoreRay
    IF(irot .EQ. 2) THEN
      jinc = -1; jbeg = nCoreRay; jend = 1
    ENDIF
    !$ACC LOOP SEQ
    DO j = jbeg, jend, jinc
      idir = RayInfo%RotRay(iRotRay)%DIR(j);
      iazi = RayInfo%CoreRay(RayInfo%RotRay(iRotRay)%RayIdx(j))%iang
      wt(1 : RayInfo%nPolarAngle) = wtangP0(1 : RayInfo%nPolarAngle, iazi)
      IF(irot .EQ. 2) idir = mp(idir)
      nRaySeg = nTotRaySeg(j)   
      IF(idir .EQ. 1) THEN
        PhiAngOut(:, mygb : myge, 1) = phiobd(:, mygb : myge)
        !$ACC LOOP SEQ
        DO irw = 1, nRaySeg
          !$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
          DO ig = mygb, myge
            DO ipol = 1, RayInfo%nPolarAngle
              phid = (PhiAngOut(ipol, ig, irw) - src(ig, FsrIdx(irw, j))) * ExpApp(ipol, ig, irw, j)
              PhiAngOut(ipol, ig, irw + 1) = PhiAngOut(ipol, ig, irw) - phid
              PhidRay(ipol, ig, irw) = wt(ipol) * phid
            ENDDO
          ENDDO
        ENDDO
        phiobd(:, mygb : myge) = PhiAngOut(:, mygb : myge, nRaySeg + 1)
        IF(ljout) THEN
          !$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
          DO irv = 1, nTotCellRay(j)
            DO ig = mygb, myge
              icelv = PinIdx(irv, j); isurf1 = SurfIdx(irv, j, 1); isurf2 = SurfIdx(irv, j, 2)
              phiocel1 = 0; phiocel2 = 0
              !$ACC LOOP SEQ
              DO ipol = 1, RayInfo%nPolarAngle
                phiocel1 = phiocel1 + wt(ipol) * PhiAngOut(ipol, ig, CellRayIdxSt(irv, j, 1) + 1)
                phiocel2 = phiocel2 + wt(ipol) * PhiAngOut(ipol, ig, CellRayIdxSt(irv, j, 2))
              ENDDO
              !$ACC ATOMIC UPDATE
              Jout(ig, 2, isurf1, icelv) = Jout(ig, 2, isurf1, icelv) + phiocel1
              !$ACC END ATOMIC
              !$ACC ATOMIC UPDATE
              Jout(ig, 1, isurf2, icelv) = Jout(ig, 1, isurf2, icelv) + phiocel2
              !$ACC END ATOMIC
            ENDDO
          ENDDO
        ENDIF
      ELSE
        PhiAngOut(:, mygb : myge, nRaySeg + 2) = phiobd(:, mygb : myge)
        !$ACC LOOP SEQ
        DO irw = nRaySeg, 1, -1
          !$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
          DO ig = mygb, myge
            DO ipol = 1, RayInfo%nPolarAngle
              phid = (PhiAngOut(ipol, ig, irw + 2) - src(ig, FsrIdx(irw, j))) * ExpApp(ipol, ig, irw, j)
              PhiAngOut(ipol, ig, irw + 1) = PhiAngOut(ipol, ig, irw + 2) - phid
              PhidRay(ipol, ig, irw) = wt(ipol) * phid
            ENDDO
          ENDDO
        ENDDO
        phiobd(:, mygb : myge) = PhiAngOut(:, mygb : myge, 2)
        IF(lJout) THEN
          !$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
          DO irv = 1, nTotCellRay(j)
            DO ig = mygb, myge
              icelv = PinIdx(irv, j); isurf1 = SurfIdx(irv, j, 1); isurf2 = SurfIdx(irv, j, 2)
              phiocel1 = 0; phiocel2 = 0
              !$ACC LOOP SEQ
              DO ipol = 1, RayInfo%nPolarAngle
                phiocel1 = phiocel1 + wt(ipol) * PhiAngOut(ipol, ig, CellRayIdxSt(irv, j, 1) + 2)
                phiocel2 = phiocel2 + wt(ipol) * PhiAngOut(ipol, ig, CellRayIdxSt(irv, j, 2) + 1)
              ENDDO
              !$ACC ATOMIC UPDATE
              Jout(ig, 1, isurf1, icelv) = Jout(ig, 1, isurf1, icelv) + phiocel1
              !$ACC END ATOMIC
              !$ACC ATOMIC UPDATE
              Jout(ig, 2, isurf2, icelv) = Jout(ig, 2, isurf2, icelv) + phiocel2
              !$ACC END ATOMIC
            ENDDO
          ENDDO
        ENDIF
      ENDIF
      !$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR
      DO irv = 1, nRaySeg
        DO ig = mygb, myge
          !$ACC LOOP SEQ
          DO ipol = 1, RayInfo%nPolarAngle
            !$ACC ATOMIC UPDATE
            phis(ig, FsrIdx(irv, j)) = phis(ig, FsrIdx(irv, j)) + PhidRay(ipol, ig, irv)
            !$ACC END ATOMIC
          ENDDO
        ENDDO
      ENDDO
    ENDDO
    PhiAngIn(:, mygb : myge, PhiAngOutSvIdx) = phiobd(:, mygb : myge)
  ENDDO

  !!!!!!!!!!!!!!!! Inlined Tracking Subroutine !!!!!!!!!!!!!!!!

ENDDO  
!$ACC END PARALLEL
!$ACC END DATA

!$ACC DATA PRESENT(xst(mygb : myge, :), src(mygb : myge, :), phis(mygb : myge, :))
!$ACC PARALLEL
!$ACC LOOP INDEPENDENT GANG
DO j = 1, CoreInfo%nxy
  FsrIdxSt = CoreInfo%Pin(j)%FsrIdxSt; icelg = CoreInfo%Pin(j)%Cell(iz)
  !$ACC LOOP INDEPENDENT COLLAPSE(2) WORKER VECTOR
  DO i = 1, CoreInfo%CellInfo(icelg)%nFsr
    DO ig = mygb, myge
      ireg = FsrIdxSt + i - 1
      phis(ig, ireg) = phis(ig, ireg) / (xst(ig, ireg) * CoreInfo%CellInfo(icelg)%vol(i)) + src(ig, ireg)
    ENDDO
  ENDDO
ENDDO
!$ACC END PARALLEL
!$ACC END DATA

!$ACC EXIT DATA DELETE(xst(mygb : myge, :), src(mygb : myge, :))
!$ACC EXIT DATA COPYOUT(phis(mygb : myge, :), Jout(mygb : myge, :, :, :), PhiAngIn(:, mygb : myge, :))

PAUSE

END SUBROUTINE

Hi CNJ,

I suspect that there’s some type of data race going on but given the complex structure of the code it’s hard for me to tell exactly what’s wrong given just this snipit. If can, please send a reproducing example to PGI Customer Service (trs@pgroup.com) and I’ll see if I can find the issue.

Thanks,
Mat

It’s hard to reproduce an example, because the behavior of the compiler is unpredictable.

I removed some sections that are unrelated with errors to simplify a little bit.

Please see if you can find some problems.

I presume that some optimization options are causing errors given that it runs correctly in debug mode.

Are there any default OpenACC options used in release mode which are not used in debug mode?

I will try removing those options.

#include <defines.h> 
SUBROUTINE RayTrace_GPU(RayInfo, CoreInfo, phis, PhiAngIn, xst, src, jout, iz, mygb, myge, ljout) 
USE PARAM 
USE TYPEDEF, ONLY : RayInfo_Type, Coreinfo_type 
USE MOC_MOD, ONLY : nMaxRaySeg,     nMaxCellRay,    nMaxAsyRay,     nMaxCoreRay,    & 
                    EXPAPolar,      EXPBPolar,      wtangP0 
USE PE_MOD,  ONLY : PE, GPUControl 
IMPLICIT NONE 
TYPE(RayInfo_Type) :: RayInfo 
TYPE(CoreInfo_Type) :: CoreInfo 
!$ACC DECLARE PRESENT(RayInfo, CoreInfo, GPUControl, EXPAPolar, EXPBPolar, wtangP0) 
REAL(DP), POINTER :: phis(:, :), PhiAngIn(:, :, :), xst(:, :), src(:, :), jout(:, :, :, :) 
INTEGER :: iz, mygb, myge 
LOGICAL :: ljout 

INTEGER :: iRay 
INTEGER :: i, j, k, l, m, jbeg, jend, jinc, irg, irw, irw1, irv, ig 

REAL(DP) :: wt(RayInfo%nPolarAngle), tau 
REAL(DP) :: phiobd(Rayinfo%nPolarAngle, mygb : myge), phid, phiocel1, phiocel2 
INTEGER :: iazi, ipol, PhiAnginSvIdx, PhiAngOutSvIdx 
INTEGER :: nCoreRay, nAsyRay, nPinRay, nRaySeg 
INTEGER :: irotray, icoreray, iasyray, iceray, irayseg 
INTEGER :: ipin, icelg, icelv, iasy, ireg, isurf1, isurf2, irot, idir, ifsr 
INTEGER :: irsegidx, icellrayidx, FsrIdxSt 

INTEGER :: mp(2) = (/ 2, 1 /) 

! Tracking Data Storages 
INTEGER :: nTotRaySeg(nMaxCoreRay), nTotCellRay(nMaxCoreRay) 
INTEGER :: CellRayIdxSt(nMaxCellRay, nMaxCoreRay, 2) 
INTEGER :: PinIdx(nMaxCellRay, nMaxCoreRay) 
INTEGER :: SurfIdx(nMaxCellRay, nMaxCoreRay, 2) 
INTEGER :: ExpAppIdx(mygb : myge, nMaxRaySeg, nMaxCoreRay) 
INTEGER :: FsrIdx(nMaxRaySeg, nMaxCoreRay) 
REAL(DP) :: ExpApp(RayInfo%nPolarAngle, mygb : myge, nMaxRaySeg, nMaxCoreRay) 
REAL(DP) :: OptLenList(mygb : myge, nMaxRaySeg, nMaxCoreRay) 
REAL(DP) :: PhiAngOut(RayInfo%nPolarAngle, mygb : myge, nMaxRaySeg + 2) 
REAL(DP) :: PhidRay(RayInfo%nPolarAngle, mygb : myge, nMaxRaySeg) 

!$ACC ENTER DATA COPYIN(xst(mygb : myge, :), src(mygb : myge, :), PhiAngIn(:, mygb : myge, :)) 
!$ACC ENTER DATA CREATE(phis(mygb : myge, :), Jout(mygb : myge, :, :, :)) 

!$ACC DATA PRESENT(phis(mygb : myge, :), Jout(mygb : myge, :, :, :)) 
!$ACC KERNELS 
  phis(mygb : myge, :) = 0 
!$ACC END KERNELS 
IF (ljout) THEN 
  !$ACC KERNELS 
    jout(mygb : myge, :, :, :) = 0 
  !$ACC END KERNELS 
ENDIF 
!$ACC END DATA 

!$ACC DATA PRESENT(xst(mygb : myge, :), src(mygb : myge, :), PhiAngIn(:, mygb : myge, :),                        & 
!$ACC              phis(mygb : myge, :), Jout(mygb : myge, :, :, :)) 
!$ACC PARALLEL NUM_GANGS(GPUControl(1)%nGang) NUM_WORKERS(2) VECTOR_LENGTH(GPUControl(1)%nVector) 
!$ACC LOOP INDEPENDENT GANG PRIVATE(irsegidx, icellrayidx, nTotRaySeg, nTotCellRay, CellRayIdxSt,                & 
!$ACC                               PinIdx, SurfIdx, ExpAppIdx, FsrIdx, ExpApp, OptLenList) 
DO iRay = 1, GPUControl(1)%nRay 
  !$ACC CACHE(nTotRaySeg, nTotCellRay) 
  iRotRay = GPUControl(1)%RayList(iRay) 

  !!!!!!!!!!!!!!!! Inlined Tracking Subroutine !!!!!!!!!!!!!!!! 
      
  nCoreRay = RayInfo%RotRay(iRotRay)%nRay 
  !$ACC LOOP SEQ 
  DO j = 1, nCoreRay          
    irsegidx = 0; icellrayidx = 0 
    iCoreRay = RayInfo%RotRay(iRotRay)%RayIdx(j) 
    nAsyRay = RayInfo%CoreRay(iCoreRay)%nRay 
    !$ACC LOOP SEQ 
    DO k = 1, nAsyRay 
      iasyray = RayInfo%CoreRay(iCoreRay)%AsyRayIdx(k) 
      iasy = RayInfo%CoreRay(iCoreRay)%AsyIdx(k) 
      IF(iasy .EQ. 0) CYCLE 
      nPinRay = RayInfo%AsyRay(iAsyRay)%nCellRay 
      !$ACC LOOP SEQ 
      DO l = 1, nPinRay 
        ipin = RayInfo%AsyRay(iAsyRay)%PinIdx(l) 
        iceray = RayInfo%AsyRay(iAsyRay)%PinRayIdx(l) 
        ipin = CoreInfo%Asy(iAsy)%GlobalPinIdx(ipin) 
        icelg = CoreInfo%Pin(ipin)%Cell(iz) 
        FsrIdxSt = CoreInfo%Pin(ipin)%FsrIdxSt 
        irg = icellrayidx + l 
        PinIdx(irg, j) = ipin 
        CellRayIdxSt(irg, j, 2) = irsegidx + 1 
        nRaySeg = CoreInfo%CellInfo(icelg)%CellRay(iceray)%nSeg 
        !$ACC LOOP INDEPENDENT WORKER VECTOR 
        DO iRaySeg = 1, nRaySeg 
          irv = irsegidx + iRaySeg 
          ireg = FsrIdxSt + CoreInfo%CellInfo(icelg)%CellRay(iceray)%LocalFsrIdx(iRaySeg) - 1 
          FsrIdx(irv, j) = ireg 
          !$ACC LOOP SEQ 
          DO ig = mygb, myge 
            tau = - CoreInfo%CellInfo(icelg)%CellRay(iceray)%LenSeg(iRaySeg) * xst(ig, ireg) 
            OptLenList(ig, irv, j) = tau 
            ExpAppIdx(ig, irv, j) = min(0, max(INT(tau), -40000)) 
          ENDDO 
        ENDDO 
        irsegidx = irsegidx + nRaySeg 
        CellRayIdxSt(irg, j, 1) = irsegidx 
        SurfIdx(irg, j, 1) = RayInfo%AsyRay(iAsyRay)%PinRaySurf(2, l) 
        SurfIdx(irg, j, 2) = RayInfo%AsyRay(iAsyRay)%PinRaySurf(1, l) 
      ENDDO 
      icellrayidx = icellrayidx + nPinRay 
    ENDDO 
    nTotRaySeg(j) = irsegidx 
    nTotCellRay(j) = icellRayIdx 
    !$ACC LOOP INDEPENDENT WORKER 
    DO iRaySeg = 1, nTotRaySeg(j) 
      !$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR 
      DO ig = mygb, myge 
        DO ipol = 1, RayInfo%nPolarAngle 
          ExpApp(ipol, ig, iRaySeg, j) & 
          = EXPAPolar(ipol, ExpAppIdx(ig, iRaySeg, j)) * OptLenList(ig, iRaySeg, j) + EXPBPolar(ipol, ExpAppIdx(ig, iRaySeg, j)) 
        ENDDO 
      ENDDO 
    ENDDO 
  ENDDO 

  !!!!!!! Problem in this worker loop !!!!!!! 

  !$ACC LOOP INDEPENDENT WORKER PRIVATE(wt, phiobd, PhiAngOut, PhidRay) 
  DO irot = 1, 2 
    !$ACC CACHE(wt) 
    PhiAnginSvIdx = RayInfo%PhiAngInSvIdx(iRotRay, irot) 
    PhiAngOutSvIdx = RayInfo%PhiangOutSvIdx(iRotRay, irot) 
    phiobd(:, mygb : myge) = PhiAngIn(:, mygb : myge, PhiAnginSvIdx) 
    jinc = 1; jbeg = 1; jend = nCoreRay 
    IF(irot .EQ. 2) THEN 
      jinc = -1; jbeg = nCoreRay; jend = 1 
    ENDIF 
    !$ACC LOOP SEQ 
    DO j = jbeg, jend, jinc 
      idir = RayInfo%RotRay(iRotRay)%DIR(j); 
      iazi = RayInfo%CoreRay(RayInfo%RotRay(iRotRay)%RayIdx(j))%iang 
      wt(1 : RayInfo%nPolarAngle) = wtangP0(1 : RayInfo%nPolarAngle, iazi) 
      IF(irot .EQ. 2) idir = mp(idir) 
      nRaySeg = nTotRaySeg(j)    
      IF(idir .EQ. 1) THEN 
        PhiAngOut(:, mygb : myge, 1) = phiobd(:, mygb : myge) 
        !$ACC LOOP SEQ 
        DO irw = 1, nRaySeg 
          !$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR 
          DO ig = mygb, myge 
            DO ipol = 1, RayInfo%nPolarAngle 
              phid = (PhiAngOut(ipol, ig, irw) - src(ig, FsrIdx(irw, j))) * ExpApp(ipol, ig, irw, j) 
              PhiAngOut(ipol, ig, irw + 1) = PhiAngOut(ipol, ig, irw) - phid 
              PhidRay(ipol, ig, irw) = wt(ipol) * phid 
            ENDDO 
          ENDDO 
        ENDDO 
        phiobd(:, mygb : myge) = PhiAngOut(:, mygb : myge, nRaySeg + 1) 
      ELSE 
        PhiAngOut(:, mygb : myge, nRaySeg + 2) = phiobd(:, mygb : myge) 
        !$ACC LOOP SEQ 
        DO irw = nRaySeg, 1, -1 
          !$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR 
          DO ig = mygb, myge 
            DO ipol = 1, RayInfo%nPolarAngle 
              phid = (PhiAngOut(ipol, ig, irw + 2) - src(ig, FsrIdx(irw, j))) * ExpApp(ipol, ig, irw, j) 
              PhiAngOut(ipol, ig, irw + 1) = PhiAngOut(ipol, ig, irw + 2) - phid 
              PhidRay(ipol, ig, irw) = wt(ipol) * phid 
            ENDDO 
          ENDDO 
        ENDDO 
        phiobd(:, mygb : myge) = PhiAngOut(:, mygb : myge, 2) 
      ENDIF 
      !$ACC LOOP INDEPENDENT COLLAPSE(2) VECTOR 
      DO irv = 1, nRaySeg 
        DO ig = mygb, myge 
          !$ACC LOOP SEQ 
          DO ipol = 1, RayInfo%nPolarAngle 
            !$ACC ATOMIC UPDATE 
            phis(ig, FsrIdx(irv, j)) = phis(ig, FsrIdx(irv, j)) + PhidRay(ipol, ig, irv) 
            !$ACC END ATOMIC 
          ENDDO 
        ENDDO 
      ENDDO 
    ENDDO 
    PhiAngIn(:, mygb : myge, PhiAngOutSvIdx) = phiobd(:, mygb : myge) 
  ENDDO 

  !!!!!!!!!!!!!!!! Inlined Tracking Subroutine !!!!!!!!!!!!!!!! 

ENDDO  
!$ACC END PARALLEL 
!$ACC END DATA

I presume that some optimization options are causing errors given that it runs correctly in debug mode.

Possible, but without a reproducing example I unfortunately can’t tell. If you can send the whole program to PGI Customer Service (trs@pgroup.com) we can take a look.

Are there any default OpenACC options used in release mode which are not used in debug mode?

By default in release mode, high optimizations such as “-fast” are used. Though the exact options are user configurable so please review your project properties to see what options you have used.

  • Mat