I am trying to call a subroutine (named MP5_INTERPOLATE here) belonging to a module from a OpenACC compute region. while the code is running fine sequentially under “!$acc loop seq” directives, the program has returned the following error during runtime when parallel directives were used. I have privitized the variables properly and took care of using “!$acc routine seq” directive in module subroutine too, what can be the possible reason for this error?
Failing in Thread:1
call to cuStreamSynchronize returned error 700: Illegal address during kernel execution
This is the subroutine from which I am calling the module subroutine named “MP5_INTERPOLATE”
SUBROUTINE WENO5_FACE_INTERPOLATION_WITH_PRIMS_CHARS()
use declare_variables
use MP5_subroutines
implicit none
integer :: II
double precision, dimension(nprims) :: W_iph, W_imh
double precision, dimension(-2:3,nprims) :: Wp
double precision, dimension(nprims,nprims) :: L_Eig, R_Eig
double precision :: p, c, sqrt_rho, divisor, rho
!$acc parallel loop gang collapse(3) default(present)
DO nbl = 1,nblocks
DO k = 1,NKmax
DO j = 1,NJmax
!$acc loop vector private(L_Eig,R_Eig,Wp,W_iph,W_imh,n_prim)
DO i = 0,NImax
if (k.le.NK(nbl).and.j.le.NJ(nbl).and.i.le.NI(nbl)) then
! Roe-averaged 'Rho' and 'c' on i+1/2 face
sqrt_rho = sqrt(Qp(i+1,j,k,nbl,1)/Qp(i,j,k,nbl,1))
rho = sqrt_rho*Qp(i,j,k,nbl,1)
divisor = 1.d0/(sqrt_rho+1.d0)
p = (Qp(i,j,k,nbl,5) + (Qp(i+1,j,k,nbl,5)*sqrt_rho))*divisor
c = sqrt(gamma*p/rho)
! Left and right Eigen vectors on i+1/2 face
!--------------------------------------------
L_Eig(1,1) = 0.d0 ; R_Eig(1,1) = 0.5d0/c**2.d0
L_Eig(2,1) = Ix_c(i,j,k,nbl)*c**2.d0 ; R_Eig(2,1) = -0.5d0*Ix_c(i,j,k,nbl)/rho/c
L_Eig(3,1) = Iz_c(i,j,k,nbl)*c**2.d0 ; R_Eig(3,1) = -0.5d0*Iy_c(i,j,k,nbl)/rho/c
L_Eig(4,1) = -Iy_c(i,j,k,nbl)*c**2.d0 ; R_Eig(4,1) = -0.5d0*Iz_c(i,j,k,nbl)/rho/c
L_Eig(5,1) = 0.d0 ; R_Eig(5,1) = 0.5d0
;
L_Eig(1,2) = -Ix_c(i,j,k,nbl)*rho*c ; R_Eig(1,2) = Ix_c(i,j,k,nbl)/c**2.d0
L_Eig(2,2) = 0.d0 ; R_Eig(2,2) = 0.d0
L_Eig(3,2) = -Iy_c(i,j,k,nbl) ; R_Eig(3,2) = -Iz_c(i,j,k,nbl)
L_Eig(4,2) = -Iz_c(i,j,k,nbl) ; R_Eig(4,2) = Iy_c(i,j,k,nbl)
L_Eig(5,2) = Ix_c(i,j,k,nbl)*rho*c ; R_Eig(5,2) = 0.d0
;
L_Eig(1,3) = -Iy_c(i,j,k,nbl)*rho*c ; R_Eig(1,3) = Iz_c(i,j,k,nbl)/c**2.d0
L_Eig(2,3) = -Iz_c(i,j,k,nbl) ; R_Eig(2,3) = -Iy_c(i,j,k,nbl)
L_Eig(3,3) = Ix_c(i,j,k,nbl) ; R_Eig(3,3) = Ix_c(i,j,k,nbl)
L_Eig(4,3) = 0.d0 ; R_Eig(4,3) = 0.d0
L_Eig(5,3) = Iy_c(i,j,k,nbl)*rho*c ; R_Eig(5,3) = 0.d0
;
L_Eig(1,4) = -Iz_c(i,j,k,nbl)*rho*c ; R_Eig(1,4) = -Iy_c(i,j,k,nbl)/c**2.d0
L_Eig(2,4) = Iy_c(i,j,k,nbl) ; R_Eig(2,4) = -Iz_c(i,j,k,nbl)
L_Eig(3,4) = 0.d0 ; R_Eig(3,4) = 0.d0
L_Eig(4,4) = Ix_c(i,j,k,nbl) ; R_Eig(4,4) = Ix_c(i,j,k,nbl)
L_Eig(5,4) = Iz_c(i,j,k,nbl)*rho*c ; R_Eig(5,4) = 0.d0
;
L_Eig(1,5) = 1.d0 ; R_Eig(1,5) = 0.5d0/c**2.d0
L_Eig(2,5) = -Ix_c(i,j,k,nbl) ; R_Eig(2,5) = 0.5d0*Ix_c(i,j,k,nbl)/rho/c
L_Eig(3,5) = -Iz_c(i,j,k,nbl) ; R_Eig(3,5) = 0.5d0*Iy_c(i,j,k,nbl)/rho/c
L_Eig(4,5) = Iy_c(i,j,k,nbl) ; R_Eig(4,5) = 0.5d0*Iz_c(i,j,k,nbl)/rho/c
L_Eig(5,5) = 1.d0 ; R_Eig(5,5) = 0.5d0
!--------------------------------------------------------------------------------------------
! Use these eigen vectors at i, to convert 'Qp' at all II = -2,3 to 'Wp'
!$acc loop seq
DO II=-2,3
!$acc loop seq
DO n_prim = 1,nprims
Wp(II,n_prim) = L_Eig(n_prim,1)*Qp(i+II,j,k,nbl,1) +&
L_Eig(n_prim,2)*Qp(i+II,j,k,nbl,2) +&
L_Eig(n_prim,3)*Qp(i+II,j,k,nbl,3) +&
L_Eig(n_prim,4)*Qp(i+II,j,k,nbl,4) +&
L_Eig(n_prim,5)*Qp(i+II,j,k,nbl,5)
ENDDO
ENDDO
!========================================================
Call MP5_INTERPOLATE(Wp, W_iph, W_imh, nprims)
!========================================================
!$acc loop seq
DO n_prim = 1,nprims
Qp_iphL(i,j,k,nbl,n_prim) = R_Eig(n_prim,1)*W_iph(1) + &
R_Eig(n_prim,2)*W_iph(2) + &
R_Eig(n_prim,3)*W_iph(3) + &
R_Eig(n_prim,4)*W_iph(4) + &
R_Eig(n_prim,5)*W_iph(5)
Qp_iphR(i,j,k,nbl,n_prim) = R_Eig(n_prim,1)*W_imh(1) + &
R_Eig(n_prim,2)*W_imh(2) + &
R_Eig(n_prim,3)*W_imh(3) + &
R_Eig(n_prim,4)*W_imh(4) + &
R_Eig(n_prim,5)*W_imh(5)
ENDDO
endif
ENDDO
ENDDO
ENDDO
ENDDO
!$acc parallel loop gang collapse(3) default(present)
DO nbl = 1,nblocks
DO k = 1,NKmax
DO j = 0,NJmax
!$acc loop vector private(L_Eig,R_Eig,Wp,W_iph,W_imh,n_prim)
DO i = 1,NImax
if (k.le.NK(nbl).and.j.le.NJ(nbl).and.i.le.NI(nbl)) then
! Roe-averaged 'Rho' and 'c' on j+1/2 face
sqrt_rho =sqrt(Qp(i,j+1,k,nbl,1)/Qp(i,j,k,nbl,1))
rho =sqrt_rho*Qp(i,j,k,nbl,1)
divisor =1.0d0/(sqrt_rho+1.0d0)
p = (Qp(i,j,k,nbl,5) + (Qp(i,j+1,k,nbl,5)*sqrt_rho))*divisor
c = sqrt(gamma*p/rho)
! Left and right Eigen vectors on j+1/2 face
!--------------------------------
L_Eig(1,1) = 0.d0 ; R_Eig(1,1) = 0.5d0/c**2.d0
L_Eig(2,1) = Jx_c(i,j,k,nbl)*c**2.d0 ; R_Eig(2,1) = -0.5d0*Jx_c(i,j,k,nbl)/rho/c
L_Eig(3,1) = Jz_c(i,j,k,nbl)*c**2.d0 ; R_Eig(3,1) = -0.5d0*Jy_c(i,j,k,nbl)/rho/c
L_Eig(4,1) = -Jy_c(i,j,k,nbl)*c**2.d0 ; R_Eig(4,1) = -0.5d0*Jz_c(i,j,k,nbl)/rho/c
L_Eig(5,1) = 0.d0 ; R_Eig(5,1) = 0.5d0
;
L_Eig(1,2) = -Jx_c(i,j,k,nbl)*rho*c ; R_Eig(1,2) = Jx_c(i,j,k,nbl)/c**2.d0
L_Eig(2,2) = 0.d0 ; R_Eig(2,2) = 0.d0
L_Eig(3,2) = -Jy_c(i,j,k,nbl) ; R_Eig(3,2) = -Jz_c(i,j,k,nbl)
L_Eig(4,2) = -Jz_c(i,j,k,nbl) ; R_Eig(4,2) = Jy_c(i,j,k,nbl)
L_Eig(5,2) = Jx_c(i,j,k,nbl)*rho*c ; R_Eig(5,2) = 0.d0
;
L_Eig(1,3) = -Jy_c(i,j,k,nbl)*rho*c ; R_Eig(1,3) = Jz_c(i,j,k,nbl)/c**2.d0
L_Eig(2,3) = -Jz_c(i,j,k,nbl) ; R_Eig(2,3) = -Jy_c(i,j,k,nbl)
L_Eig(3,3) = Jx_c(i,j,k,nbl) ; R_Eig(3,3) = Jx_c(i,j,k,nbl)
L_Eig(4,3) = 0.d0 ; R_Eig(4,3) = 0.d0
L_Eig(5,3) = Jy_c(i,j,k,nbl)*rho*c ; R_Eig(5,3) = 0.d0
;
L_Eig(1,4) = -Jz_c(i,j,k,nbl)*rho*c ; R_Eig(1,4) = -Jy_c(i,j,k,nbl)/c**2.d0
L_Eig(2,4) = Jy_c(i,j,k,nbl) ; R_Eig(2,4) = -Jz_c(i,j,k,nbl)
L_Eig(3,4) = 0.d0 ; R_Eig(3,4) = 0.d0
L_Eig(4,4) = Jx_c(i,j,k,nbl) ; R_Eig(4,4) = Jx_c(i,j,k,nbl)
L_Eig(5,4) = Jz_c(i,j,k,nbl)*rho*c ; R_Eig(5,4) = 0.d0
;
L_Eig(1,5) = 1.d0 ; R_Eig(1,5) = 0.5d0/c**2.d0
L_Eig(2,5) = -Jx_c(i,j,k,nbl) ; R_Eig(2,5) = 0.5d0*Jx_c(i,j,k,nbl)/rho/c
L_Eig(3,5) = -Jz_c(i,j,k,nbl) ; R_Eig(3,5) = 0.5d0*Jy_c(i,j,k,nbl)/rho/c
L_Eig(4,5) = Jy_c(i,j,k,nbl) ; R_Eig(4,5) = 0.5d0*Jz_c(i,j,k,nbl)/rho/c
L_Eig(5,5) = 1.d0 ; R_Eig(5,5) = 0.5d0
!------------------------------------------------------------------------------------------
! Use these eigen vectors at i, to convert 'Qp' at all II = -2,3 to 'Wp'
!$acc loop seq
DO II=-2,3
!$acc loop seq
DO n_prim = 1,nprims
Wp(II,n_prim) = L_Eig(n_prim,1)*Qp(i,j+II,k,nbl,1) +&
L_Eig(n_prim,2)*Qp(i,j+II,k,nbl,2) +&
L_Eig(n_prim,3)*Qp(i,j+II,k,nbl,3) +&
L_Eig(n_prim,4)*Qp(i,j+II,k,nbl,4) +&
L_Eig(n_prim,5)*Qp(i,j+II,k,nbl,5)
ENDDO
ENDDO
!========================================================
Call MP5_INTERPOLATE(Wp, W_iph, W_imh, nprims)
!========================================================
!$acc loop seq
DO n_prim = 1,nprims
Qp_jphL(i,j,k,nbl,n_prim) = R_Eig(n_prim,1)*W_iph(1) + &
R_Eig(n_prim,2)*W_iph(2) + &
R_Eig(n_prim,3)*W_iph(3) + &
R_Eig(n_prim,4)*W_iph(4) + &
R_Eig(n_prim,5)*W_iph(5)
Qp_jphR(i,j,k,nbl,n_prim) = R_Eig(n_prim,1)*W_imh(1) + &
R_Eig(n_prim,2)*W_imh(2) + &
R_Eig(n_prim,3)*W_imh(3) + &
R_Eig(n_prim,4)*W_imh(4) + &
R_Eig(n_prim,5)*W_imh(5)
ENDDO
endif
ENDDO
ENDDO
ENDDO
ENDDO
END
This is the Module in which MP5_INTERPOLATE subroutine is present:
MODULE MP5_subroutines
contains
SUBROUTINE MP5_INTERPOLATE(Wp, W_iph, W_imh, nprims)
!$acc routine seq
! MP5 by Suresh and Hyunh Ref- https://doi.org/10.1006/JCPH.1997.5745
! Task: Give a small stencil of Characteristic variables Wp(i-2,i+3),
! Subroutine interpolates data to right face of 'i' cell (PHI_iph(i)) and left face of 'i+1' cell (Phi_imh(i))
use some_functions
implicit none
Integer :: n_prim, nprims, II
double precision, dimension(nprims) :: W_iph, W_imh
double precision, dimension(-2:3,nprims) :: Wp
double precision :: PHI_mp, D_iph, D_imh, PHI_UL, PHI_AV, PHI_MD, PHI_LC, PHI_min, PHI_max
double precision :: DI_minus, DI_zero, DI_plus
double precision, parameter :: eps = 1d-40, alp = 4.d0, B2 = 4.d0/3.d0
DO n_prim = 1,nprims
II = 0
! This is the interpolation polynomial
! Taylor series based interpolation, Legendre polynomial interpolation, Newton polynomial interpolation
! any thing can be used
W_iph(n_prim) = (2.d0*Wp(II-2,n_prim) - 13.d0*Wp(II-1,n_prim) &
+ 47.d0*Wp(II,n_prim) + 27.d0*Wp(II+1,n_prim) - 3.d0*Wp(II+2,n_prim))/60.d0
PHI_mp = Wp(II,n_prim) + MINMOD2(Wp(II+1,n_prim)-Wp(II,n_prim), alp*(Wp(II,n_prim)-Wp(II-1,n_prim)))
! Sometimes the interpolation can create new maxima/minima, so check and correct
! Maintain TVD
IF ((W_iph(n_prim) - Wp(II,n_prim))*(W_iph(n_prim) - PHI_mp) >= eps) THEN
Di_minus = Wp(II-2,n_prim) - 2.d0*Wp(II-1,n_prim) + Wp(II,n_prim)
Di_zero = Wp(II-1,n_prim) - 2.d0*Wp(II,n_prim) + Wp(II+1,n_prim)
DI_plus = Wp(II,n_prim) - 2.d0*Wp(II+1,n_prim) + Wp(II+2,n_prim)
D_iph = MINMOD4(4.d0*DI_zero - DI_plus , 4.d0*Di_plus - DI_zero, DI_zero, DI_plus)
D_imh = MINMOD4(4.d0*Di_zero - DI_minus, 4.d0*Di_minus - DI_zero, DI_zero, DI_minus)
PHI_UL = Wp(II,n_prim) + alp*(Wp(II,n_prim) - Wp(II-1,n_prim))
PHI_AV = 0.5d0*(Wp(II,n_prim) + Wp(II+1,n_prim))
PHI_MD = PHI_AV - 0.5d0*D_iph
PHI_LC = Wp(II,n_prim) + 0.5d0*(Wp(II,n_prim)-Wp(II-1,n_prim)) + B2*D_imh
PHI_min = MAX(MIN(Wp(II,n_prim), Wp(II+1,n_prim), PHI_MD), MIN(Wp(II,n_prim), PHI_UL, PHI_LC))
PHI_max = MIN(MAX(Wp(II,n_prim), Wp(II+1,n_prim), PHI_MD), MAX(Wp(II,n_prim), PHI_UL, PHI_LC))
W_iph(n_prim) = W_iph(n_prim) + MINMOD2(PHI_min-W_iph(n_prim), PHI_max-W_iph(n_prim))
ENDIF
ENDDO
!======================================================================================================
DO n_prim = 1,nprims
II = 1
W_imh(n_prim) = (2.d0*Wp(II+2,n_prim) - 13.d0*Wp(II+1,n_prim) &
+ 47.d0*Wp(II,n_prim) + 27.d0*Wp(II-1,n_prim) - 3.d0*Wp(II-2,n_prim))/60.d0
PHI_mp = Wp(II,n_prim) + MINMOD2(Wp(II-1,n_prim) - Wp(II,n_prim), alp*(Wp(II,n_prim) - Wp(II+1,n_prim)))
IF ((W_imh(n_prim) - Wp(II,n_prim))*(W_imh(n_prim) - PHI_mp) >= eps) THEN
DI_plus = Wp(II+2,n_prim) - 2.d0*Wp(II+1,n_prim) + Wp(II,n_prim)
DI_zero = Wp(II+1,n_prim) - 2.d0*Wp(II,n_prim) + Wp(II-1,n_prim)
DI_minus = Wp(II,n_prim) - 2.d0*Wp(II-1,n_prim) + Wp(II-2,n_prim)
D_iph = MINMOD4(4.d0*DI_zero - DI_plus, 4.d0*Di_plus - DI_zero, DI_zero, DI_plus)
D_imh = MINMOD4(4.d0*DI_zero - DI_minus, 4.d0*Di_minus - DI_zero, DI_zero, DI_minus)
PHI_UL = Wp(II,n_prim) + alp*(Wp(II,n_prim) - Wp(II+1,n_prim))
PHI_AV = 0.5d0*(Wp(II,n_prim) + Wp(II-1,n_prim))
PHI_MD = PHI_AV - 0.5d0*D_imh
PHI_LC = Wp(II,n_prim) + 0.5d0*(Wp(II,n_prim)-Wp(II+1,n_prim)) + B2*D_iph
PHI_min = MAX(MIN(Wp(II,n_prim), Wp(II-1,n_prim), PHI_MD), MIN(Wp(II,n_prim), PHI_UL, PHI_LC))
PHI_max = MIN(MAX(Wp(II,n_prim), Wp(II-1,n_prim), PHI_MD), MAX(Wp(II,n_prim), PHI_UL, PHI_LC))
W_imh(n_prim) = W_imh(n_prim) + MINMOD2(PHI_min-W_imh(n_prim), PHI_max-W_imh(n_prim))
ENDIF
ENDDO
END SUBROUTINE
END