Hello,
I found what I believe is a bug in nvfortran 21.2. I have a Fortran code with an outermost gang loop, inside which worker subroutines (!$acc routine worker) are called. One of these apparently failed to privatize the temporary arrays inside the worker subroutine, resulting in a race condition and wrong results, despite declaring these arrays as private for the worker loop. When I inline this subroutine manually, the code works as intended.
I will provide the subroutine below. The input-output arguments are declared as private for the outer gang loop in the higher-level code. Let me know if you need more information!
pure subroutine sw_source_adding_2str_nocol(ngpt, nlay, top_at_1, &
Rdif, Tdif, Rdir, Tdir, Tnoscat, &
sfc_albedo, sfc_albedo_dif, &
flux_up, flux_dn, flux_dn_dir)
!$acc routine worker
integer, intent(in ) :: ngpt, nlay
logical(wl), intent(in ) :: top_at_1
real(wp), dimension(ngpt,nlay), intent(in) :: Rdif, Tdif, Rdir, Tdir, Tnoscat
real(wp), dimension(ngpt ), intent(in ) :: sfc_albedo, sfc_albedo_dif ! surface albedo for direct radiation
real(wp), dimension(ngpt, nlay+1), intent(out) :: flux_up
real(wp), dimension(ngpt, nlay+1), intent(inout) :: flux_dn, flux_dn_dir ! Direct beam flux
! intent(inout) because top layer includes incident flux
integer :: igpt, ilev
! ---------------------------------
real(wp), dimension(nlay+1) :: albedo!, & ! reflectivity to diffuse radiation below this level
! alpha in SH08
real(wp), dimension(nlay ) :: denom ! beta in SH08
real(wp), dimension(nlay+1) :: source ! source of diffuse upwelling radiation from emission or
! scattering of direct beam. G in SH08
real(wp), dimension(nlay ) :: source_dn, source_up
real(wp) :: source_sfc
! ---------------------------------
! Big parallelizable loop over first dimension of input-output arrays. Results in race condition!
!$acc loop worker vector private(albedo,denom,source,source_dn,source_up,source_sfc,ilev)
do igpt = 1, ngpt
! Non-parallelizable computations over nlay
!$acc loop seq
do ilev = 1, nlay
source_up(ilev) = Rdir(igpt,ilev) * flux_dn_dir(igpt,ilev)
source_dn(ilev) = Tdir(igpt,ilev) * flux_dn_dir(igpt,ilev)
flux_dn_dir(igpt,ilev+1) = Tnoscat(igpt,ilev) * flux_dn_dir(igpt,ilev)
end do
source_sfc = flux_dn_dir(igpt,nlay+1)*sfc_albedo(igpt)
ilev = nlay + 1
albedo(ilev) = sfc_albedo_dif(igpt)
source(ilev) = source_sfc
!$acc loop seq
do ilev = nlay, 1, -1
denom(ilev) = 1._wp/(1._wp - Rdif(igpt,ilev)*albedo(ilev+1)) ! Eq 10
albedo(ilev) = Rdif(igpt,ilev) + &
Tdif(igpt,ilev)*Tdif(igpt,ilev) * albedo(ilev+1) * denom(ilev) ! Equation 9
source(ilev) = source_up( ilev) + &
Tdif(igpt,ilev) * denom(ilev) * &
(source(ilev+1) + albedo(ilev+1)*source_dn(ilev))
end do
ilev = 1
flux_up(igpt,ilev) = flux_dn(igpt,ilev) * albedo(ilev) + & ! ... reflection of incident diffuse and
source(ilev) ! emission from below
do ilev = 2, nlay+1
flux_dn(igpt,ilev) = (Tdif(igpt,ilev-1)*flux_dn(igpt,ilev-1) + & ! Equation 13
Rdif(igpt,ilev-1)*source(ilev) + &
source_dn(ilev-1)) * denom(ilev-1)
flux_up(igpt,ilev) = flux_dn(igpt,ilev) * albedo(ilev) + & ! Equation 12
source(ilev)
end do
end do
end subroutine sw_source_adding_2str_nocol