Hi,
I am new to OpenACC. I am trying to parallelize a large Fortran 90 code. One of the code block in my program has these nested DO loops. I was trying to compute the array named ‘Fflux_iph’ through the following code using collapse directive. So I am using the collapse directive. The code has compiled fine, and is running fine too. But the ‘Fflux_iph’ is eventually resulting in NaN value. Is there any problem with the following code? The same code is running fine on a serial run.
compiler: pgfortran
flags: -acc -O3 -ta=tesla -Minfo=accel
Input variables: nblocks = 1, nprims = 5, NI(1) = 300, NJ(1) = 300, NK(1) = 300
Thanks!
double precision, dimension(0:NImax, NJmax, NKmax, nblocks, nconserv) :: Fflux_iph
double precision :: Rho_L, Rho_R, Ux_L, Ux_R, Uy_L, Uy_R, Uz_L, Uz_R, P_L, P_R, C_L, C_R, E_L, E_R, h_L, h_R, E_local
double precision :: U_contra_L, U_contra_R, S_L, S_R, S
double precision, dimension(nconserv) :: F_L, F_R
double precision :: sqrt_rho_L, sqrt_rho_R, Rho, divisor, u_average, v_average
double precision :: w_average, h_average, uvw_average,ucontra_ave, C_average
DO nbl = 1,nblocks
!$acc parallel loop collapse(3)
DO k = 1,NK(nbl)
DO j = 1,NJ(nbl)
DO i = 0,NI(nbl)
!Local variables
Rho_L = Qp_iphL(i,j,k,nbl,1)
Rho_R = Qp_iphR(i,j,k,nbl,1)
Ux_L = Qp_iphL(i,j,k,nbl,2)
Ux_R = Qp_iphR(i,j,k,nbl,2)
Uy_L = Qp_iphL(i,j,k,nbl,3)
Uy_R = Qp_iphR(i,j,k,nbl,3)
Uz_L = Qp_iphL(i,j,k,nbl,4)
Uz_R = Qp_iphR(i,j,k,nbl,4)
P_L = Qp_iphL(i,j,k,nbl,5)
P_R = Qp_iphR(i,j,k,nbl,5)
C_L = Sqrt(Gamma*P_L/Rho_L*(Ix_iph(i,j,k,nbl)**2+Iy_iph(i,j,k,nbl)**2+Iz_iph(i,j,k,nbl)**2))
C_R = Sqrt(Gamma*P_R/Rho_R*(Ix_iph(i,j,k,nbl)**2+Iy_iph(i,j,k,nbl)**2+Iz_iph(i,j,k,nbl)**2))
E_L = P_L/(Gamma-1.d0) + Rho_L*(Ux_L**2+Uy_L**2+Uz_L**2)/2.d0
E_R = P_R/(Gamma-1.d0) + Rho_R*(Ux_R**2+Uy_R**2+Uz_R**2)/2.d0
h_L = (E_L + P_L)/Rho_L
h_R = (E_R + P_R)/Rho_R
U_contra_L = Ux_L*Ix_iph(i,j,k,nbl) + Uy_L*Iy_iph(i,j,k,nbl) + Uz_L*Iz_iph(i,j,k,nbl)
U_contra_R = Ux_R*Ix_iph(i,j,k,nbl) + Uy_R*Iy_iph(i,j,k,nbl) + Uz_R*Iz_iph(i,j,k,nbl)
! Roe averaging
sqrt_rho_L = sqrt(Rho_L)
sqrt_rho_R = sqrt(Rho_R)
Rho = sqrt(Rho_R/Rho_L)*Rho_L
divisor = 1.d0/(sqrt_rho_R+sqrt_rho_L)
h_average = ((h_L*sqrt_rho_L) + (h_R*sqrt_rho_R))*divisor
u_average = ((Ux_L*sqrt_rho_L) + (Ux_R*sqrt_rho_R))*divisor
v_average = ((Uy_L*sqrt_rho_L) + (Uy_R*sqrt_rho_R))*divisor
w_average = ((Uz_L*sqrt_rho_L) + (Uz_R*sqrt_rho_R))*divisor
ucontra_ave = u_average*Ix_iph(i,j,k,nbl) + v_average*Iy_iph(i,j,k,nbl) + w_average*Iz_iph(i,j,k,nbl)
uvw_average = 0.5d0 * (u_average**2.d0 + v_average**2.d0 + w_average**2.d0)
C_average = sqrt((gamma-1.d0)*(h_average - uvw_average))*SQRT(Ix_iph(i,j,k,nbl)**2+Iy_iph(i,j,k,nbl)**2+Iz_iph(i,j,k,nbl)**2)
S = ABS(ucontra_ave + C_average)
!Step3: Estimate fluxes
F_L(1) = (rho_L*U_contra_L) /Jac_iph(i,j,k,nbl)
F_L(2) = (rho_L*U_contra_L*Ux_L + P_L*Ix_iph(i,j,k,nbl)) /Jac_iph(i,j,k,nbl)
F_L(3) = (rho_L*U_contra_L*Uy_L + P_L*Iy_iph(i,j,k,nbl)) /Jac_iph(i,j,k,nbl)
F_L(4) = (rho_L*U_contra_L*Uz_L + P_L*Iz_iph(i,j,k,nbl)) /Jac_iph(i,j,k,nbl)
F_L(5) = (E_L + P_L) * U_contra_L /Jac_iph(i,j,k,nbl)
F_R(1) = (rho_R*U_contra_R) /Jac_iph(i,j,k,nbl)
F_R(2) = (rho_R*U_contra_R*Ux_R + P_R*Ix_iph(i,j,k,nbl)) /Jac_iph(i,j,k,nbl)
F_R(3) = (rho_R*U_contra_R*Uy_R + P_R*Iy_iph(i,j,k,nbl)) /Jac_iph(i,j,k,nbl)
F_R(4) = (rho_R*U_contra_R*Uz_R + P_R*Iz_iph(i,j,k,nbl)) /Jac_iph(i,j,k,nbl)
F_R(5) = (E_R + P_R) * U_contra_R /Jac_iph(i,j,k,nbl)
Fflux_iph(i,j,k,nbl,1) = 0.5d0*(F_L(1)+F_R(1)) - 0.5d0*S*(Rho_R - Rho_L)/Jac_iph(i,j,k,nbl)
Fflux_iph(i,j,k,nbl,2) = 0.5d0*(F_L(2)+F_R(2)) - 0.5d0*S*(Rho_R*Ux_R - Rho_L*Ux_L)/Jac_iph(i,j,k,nbl)
Fflux_iph(i,j,k,nbl,3) = 0.5d0*(F_L(3)+F_R(3)) - 0.5d0*S*(Rho_R*Uy_R - Rho_L*Uy_L)/Jac_iph(i,j,k,nbl)
Fflux_iph(i,j,k,nbl,4) = (0.5d0*(F_L(4)+F_R(4)) - 0.5d0*S*(Rho_R*Uz_R - Rho_L*Uz_L)/Jac_iph(i,j,k,nbl))*(1-f2D)
Fflux_iph(i,j,k,nbl,5) = 0.5d0*(F_L(5)+F_R(5)) - 0.5d0*S*(E_R - E_L)/Jac_iph(i,j,k,nbl)
ENDDO
ENDDO
ENDDO
!$acc end parallel loop
ENDDO```