But regarding question 2, I have a strange problem.
Please refer to the following code.
#define MAX_RAY_SEG 227
#define MAX_ASY_RAY 3
ATTRIBUTES(DEVICE) SUBROUTINE cuTrackRay(RayInfo, CoreInfo, cuConst, cuMOC, cuTrack, DcmpAsyRay, ljout, iz, gb, ge)
USE PARAM
USE TYPEDEF, ONLY : RayInfo_Type, CoreInfo_Type, CellRayInfo_Type, DcmpAsyRayInfo_Type
USE CUDATypeDef, ONLY : cuConst_Type, cuMOC_Type, cuTrack_Type
USE CUDAFOR
IMPLICIT NONE
TYPE(RayInfo_Type) :: RayInfo
TYPE(CoreInfo_Type) :: CoreInfo
TYPE(cuConst_Type) :: cuConst
TYPE(cuMOC_Type) :: cuMOC
TYPE(cuTrack_Type) :: cuTrack
TYPE(DcmpAsyRayInfo_Type) :: DcmpAsyRay
LOGICAL, VALUE :: ljout
INTEGER, VALUE :: iz, gb, ge
TYPE(CellRayInfo_Type), POINTER :: CellRay
INTEGER :: mp(2)
INTEGER :: iazi, irotray, iasyray, iceray, iray, irayseg, idir
INTEGER :: nAsyRay, nPinRay, nRaySeg, FsrIdxSt
INTEGER :: ipin, icel, ibcel, iasy, ireg, isurf1, isurf2
INTEGER :: iRaySegIdx
INTEGER :: j, l, ir
INTEGER :: jbeg, jend, jinc
INTEGER :: ipol, ig, irot, dir_tid, block_tid
INTEGER :: ExpAppIdx
REAL :: tau, phiobd, wt, wt2(4)
REAL :: PhiAngOut(MAX_RAY_SEG + 2, MAX_ASY_RAY)
REAL :: ExpApp(MAX_RAY_SEG, MAX_ASY_RAY)
REAL :: DelPhi(MAX_RAY_SEG, MAX_ASY_RAY)
INTEGER, SHARED :: CellRayIdxSt(cuConst%nMaxCellRay, cuConst%nMaxAsyRay, 2)
INTEGER, SHARED :: SurfIdx(cuConst%nMaxCellRay, cuConst%nMaxAsyRay, 2)
INTEGER, SHARED :: PinIdx(cuConst%nMaxCellRay, cuConst%nMaxAsyRay)
INTEGER, SHARED :: nTotRaySeg(cuConst%nMaxAsyRay)
INTEGER, SHARED :: nTotCellRay(cuConst%nMaxAsyRay)
mp = (/2, 1/)
ipol = threadIdx%x
ig = threadIdx%y + (gb - 1)
irot = threadIdx%z
dir_tid = threadIdx%x + (threadIdx%y - 1) * blockDim%x
block_tid = threadIdx%x + (threadIdx%y - 1) * blockDim%x + (threadIdx%z - 1) * blockDim%x * blockDim%y
nAsyRay = DcmpAsyRay%nAsyRay
iRotRay = DcmpAsyRay%iRotRay
iAsy = DcmpAsyRay%iAsy; iRay = DcmpAsyRay%iRay
DO j = 1, nAsyRay
iRaySegIdx = 0
iAsyRay = DcmpAsyRay%AsyRayList(j)
nPinRay = RayInfo%AsyRay(iAsyRay)%nCellRay
DO l = 1, nPinRay
ipin = RayInfo%AsyRay(iAsyRay)%PinIdx(l)
iceray = RayInfo%AsyRay(iAsyRay)%PinRayIdx(l)
ipin = CoreInfo%Asy(iAsy)%GlobalPinIdx(ipin)
icel = CoreInfo%Pin(ipin)%Cell(iz)
FsrIdxSt = CoreInfo%Pin(ipin)%FsrIdxSt
ibcel = CoreInfo%CellInfo(icel)%BaseCellStr
CellRay => CoreInfo%CellInfo(ibcel)%CellRay(iceray)
nRaySeg = CellRay%nSeg
IF (block_tid .EQ. 1) THEN
PinIdx(l, j) = ipin
CellRayIdxSt(l, j, 1) = iRaySegIdx + nRaySeg
CellRayIdxSt(l, j, 2) = iRaySegIdx + 1
SurfIdx(l, j, 1) = RayInfo%AsyRay(iAsyRay)%PinRaySurf(2, l)
SurfIdx(l, j, 2) = RayInfo%AsyRay(iAsyRay)%PinRaySurf(1, l)
nTotCellRay(j) = nPinRay
ENDIF
DO iRaySeg = 1, nRaySeg
ireg = FsrIdxSt + CellRay%LocalFsrIdx(iRaySeg) - 1
iRaySegIdx = iRaySegIdx + 1
IF (dir_tid .EQ. 1) cuTrack%FsrIdx(iRaySegIdx, j) = ireg
tau = - CellRay%LenSeg(iRaySeg) * cuMOC%xst(ig, ireg)
ExpAppIdx = max(INT(tau), -40000); ExpAppIdx = min(0, ExpAppIdx)
ExpApp(iRaySegIdx, j) = cuConst%EXPA(ipol, ExpAppIdx) * tau + cuConst%EXPB(ipol, ExpAppIdx)
ENDDO
ENDDO
IF (block_tid .EQ. 1) nTotRaySeg(j) = iRaySegIdx
ENDDO
CALL syncThreads()
jbeg = 1; jend = nAsyRay; jinc = 1
IF (irot .EQ. 2) THEN
jbeg = nAsyRay; jend = 1; jinc = -1
ENDIF
IF (DcmpAsyRay%lRotRayBeg(irot)) THEN
phiobd = cuMOC%CorePhiAngIn(ipol, ig, RayInfo%PhiAngInSvIdx(iRotRay, irot))
ELSE
phiobd = cuMOC%AsyPhiAngIn(ipol, ig, DcmpAsyRay%prevRay(irot), DcmpAsyRay%prevAsy(irot), irot)
ENDIF
DO j = jbeg, jend, jinc
idir = DcmpAsyRay%DirList(j); iazi = DcmpAsyRay%AziList(j)
wt = cuConst%wtang(ipol, iazi)
wt2 = cuConst%wtang2(ipol, iazi, :)
nRaySeg = nTotRaySeg(j)
IF (irot .EQ. 2) idir = mp(idir)
IF (idir .EQ. 1) THEN
PhiAngOut(1, j) = phiobd
DO ir = 1, nRaySeg
ireg = cuTrack%FsrIdx(ir, j)
DelPhi(ir, j) = (PhiAngOut(ir, j) - cuMOC%src(ig, ireg)) * ExpApp(ir, j)
PhiAngOut(ir + 1, j) = PhiAngOut(ir, j) - DelPhi(ir, j)
ENDDO
phiobd = PhiAngOut(nRaySeg + 1, j)
ELSE
PhiAngOut(nRaySeg + 2, j) = phiobd
DO ir = nRaySeg, 1, -1
ireg = cuTrack%FsrIdx(ir, j)
DelPhi(ir, j) = (PhiAngOut(ir + 2, j) - cuMOC%src(ig, ireg)) * ExpApp(ir, j)
PhiAngOut(ir + 1, j) = PhiAngOut(ir + 2, j) - DelPhi(ir, j)
ENDDO
phiobd = PhiAngOut(2, j)
ENDIF
ENDDO
IF (DcmpAsyRay%lRotRayEnd(irot)) THEN
cuMOC%CorePhiAngOut(ipol, ig, RayInfo%PhiAngOutSvIdx(iRotRay, irot)) = phiobd
ELSE
cuMOC%AsyPhiAngOut(ipol, ig, iRay, iAsy, irot) = phiobd
ENDIF
DO j = jbeg, jend, jinc
iazi = DcmpAsyRay%AziList(j)
wt = cuConst%wtang(ipol, iazi)
DO ir = 1, nTotRaySeg(j)
ireg = cuTrack%FsrIdx(ir, j)
!$ACC ATOMIC
cuMOC%phis(ig, ireg) = cuMOC%phis(ig, ireg) + wt * DelPhi(ir, j)
ENDDO
IF (lJout) THEN
idir = DcmpAsyRay%DirList(j)
wt2 = cuConst%wtang2(ipol, iazi, :)
IF (irot .EQ. 2) idir = mp(idir)
IF (idir .EQ. 1) THEN
DO ir = 1, nTotCellRay(j)
icel = PinIdx(ir, j); isurf1 = SurfIdx(ir, j, 1); isurf2 = SurfIdx(ir, j, 2)
!$ACC ATOMIC
cuMOC%Jout(2, ig, isurf1, icel) = cuMOC%Jout(2, ig, isurf1, icel) + wt * PhiAngOut(CellRayIdxSt(ir, j, 1) + 1, j)
!$ACC ATOMIC
cuMOC%Jout(3, ig, isurf1, icel) = cuMOC%Jout(3, ig, isurf1, icel) + wt2(isurf1) * PhiAngOut(CellRayIdxSt(ir, j, 1) + 1, j)
!$ACC ATOMIC
cuMOC%Jout(1, ig, isurf2, icel) = cuMOC%Jout(1, ig, isurf2, icel) + wt * PhiAngOut(CellRayIdxSt(ir, j, 2), j)
!$ACC ATOMIC
cuMOC%Jout(3, ig, isurf2, icel) = cuMOC%Jout(3, ig, isurf2, icel) + wt2(isurf2) * PhiAngOut(CellRayIdxSt(ir, j, 2), j)
ENDDO
ELSE
DO ir = 1, nTotCellRay(j)
icel = PinIdx(ir, j); isurf1 = SurfIdx(ir, j, 1); isurf2 = SurfIdx(ir, j, 2)
!$ACC ATOMIC
cuMOC%Jout(2, ig, isurf2, icel) = cuMOC%Jout(2, ig, isurf2, icel) + wt * PhiAngOut(CellRayIdxSt(ir, j, 2) + 1, j)
!$ACC ATOMIC
cuMOC%Jout(3, ig, isurf2, icel) = cuMOC%Jout(3, ig, isurf2, icel) + wt2(isurf2) * PhiAngOut(CellRayIdxSt(ir, j, 2) + 1, j)
!$ACC ATOMIC
cuMOC%Jout(1, ig, isurf1, icel) = cuMOC%Jout(1, ig, isurf1, icel) + wt * PhiAngOut(CellRayIdxSt(ir, j, 1) + 2, j)
!$ACC ATOMIC
cuMOC%Jout(3, ig, isurf1, icel) = cuMOC%Jout(3, ig, isurf1, icel) + wt2(isurf1) * PhiAngOut(CellRayIdxSt(ir, j, 1) + 2, j)
ENDDO
ENDIF
ENDIF
ENDDO
END SUBROUTINE
This code runs fine.
But if I change
REAL :: PhiAngOut(MAX_RAY_SEG + 2, MAX_ASY_RAY)
REAL :: ExpApp(MAX_RAY_SEG, MAX_ASY_RAY)
REAL :: DelPhi(MAX_RAY_SEG, MAX_ASY_RAY)
to automatic arrays,
REAL :: PhiAngOut(cuConst%nMaxRaySeg + 2, cuConst%nMaxAsyRay)
REAL :: ExpApp(cuConst%nMaxRaySeg, cuConst%nMaxAsyRay)
REAL :: DelPhi(cuConst%nMaxRaySeg, cuConst%nMaxAsyRay)
segmentation fault occurs. (Error code 700)
cuConst%nMaxRaySeg, cuConst%nMaxAsyRay and MAX_RAY_SEG, MAX_ASY_RAY are exactly same.
This is why I thought that there might be some other ways to define thread-local automatic arrays.
What could be the problem?