Hi Mat,
Thanks for the reply.
Unfortunately, the error does not lie in
!$acc data copyin(self, self%riemann%solve)
I have previously gotten it working with the parallel loop inside the ‘solve’ (or hll), but this does not perform well, and the Idea here is the only have the function calls on the device.
Below I have provided a complete mockup that should maintain the core aspects and highlight the issue. In the ‘update’ function many other function needs to be called. These are functions in the riemann_mod and does not cause errors. In the solve the is to be an option to choose between different methods. Here only there ‘hll’ method is included and I have removed the ‘select’.
With the code below, ‘solve’ is still causing an error.
The code is compiled with:
pgfortran -ta=tesla:cc75 -Minfo=accel -acc -fast forum.f90 -o forum.exe
Where forum.f90 is the name of my local file
Thanks in advance.
MODULE riemann_mod
implicit none
private
type, public:: riemann_t
real(8) :: g1,g2
character(len=4) :: method='hll'
contains
procedure :: solve
end type
type(riemann_t),public :: riemann
contains
subroutine solve(self, dir, l, u, left, rght, flux)
!$acc routine gang
class(riemann_t) :: self
integer:: dir, l(3), u(3)
real, dimension(:,:,:,:) :: left
real, dimension(:,:,:,:) :: rght
real, dimension(:,:,:,:) :: flux
!print *,"method: ", self%method
call hll(self, dir, l, u, left, rght, flux, self%g1, self%g2)
end subroutine solve
subroutine hll(self, dir, l, u, left, rght, flux, g1, g2)
!$acc routine gang
class(riemann_t):: self
integer:: dir, l(3), u(3)
real, dimension(:,:,:,:) :: left
real, dimension(:,:,:,:) :: rght
real, dimension(:,:,:,:) :: flux
real(8):: g1, g2
real:: &
Dl, Ul, Pl, El, Vl, Wl, Tl, cl, fDl, fTl, fUl, fVl, fWl, Sl, &
Dr, Ur, Pr, Er, Vr, Wr, Tr, cr, fDr, fTr, fUr, fVr, fWr, Sr, &
cmax
integer:: i1, i2, i3, j1, j2, j3, v1, v2, v3
integer, save:: itimer=0
!-----------------------------------------------------------------------------
! Parallel and perpendicular velocity components
!-----------------------------------------------------------------------------
v1 = 2 + mod(dir-1,3)
v2 = 2 + mod(dir ,3)
v3 = 2 + mod(dir+1,3)
!-----------------------------------------------------------------------------
! For the left states, we need to refer to index-1 in the flux direction
!-----------------------------------------------------------------------------
if (dir==1) then
j1=1; j2=0; j3=0
else if (dir==2) then
j1=0; j2=1; j3=0
else
j1=0; j2=0; j3=1
end if
!$acc loop gang collapse(3)
do i3=l(3),u(3)
do i2=l(2),u(2)
do i1=l(1),u(1)
Dl = exp(left(i1-j1,i2-j2,i3-j3,1))
El = exp(left(i1-j1,i2-j2,i3-j3,5))
Ul = left(i1-j1,i2-j2,i3-j3,v1)
Vl = left(i1-j1,i2-j2,i3-j3,v2)
Wl = left(i1-j1,i2-j2,i3-j3,v3)
Dr = exp(rght(i1,i2,i3,1))
Er = exp(rght(i1,i2,i3,5))
Ur = rght(i1,i2,i3,v1)
Vr = rght(i1,i2,i3,v2)
Wr = rght(i1,i2,i3,v3)
!---------------------------------------------------------------------------
! Pressure and sound speed for each side of interface (l==left, r==right)
!---------------------------------------------------------------------------
!- isn't the implementation of g1 and g2 missing???
Pl = Dl*El*g1
Pr = Dr*Er*g1
cl = sqrt(g2*El)
cr = sqrt(g2*Er)
cmax = max(cl,cr)
!if (i1 == 10 .and. i2 == 10 .and. i3 == 10) then
! print *, Dl
! print *, El
! print *, left(i1-j1,i2-j2,i3-j3,1)
! print *, left(i1-j1,i2-j2,i3-j3,2)
! print *, left(i1-j1,i2-j2,i3-j3,3)
! print *, left(i1-j1,i2-j2,i3-j3,4)
! print *, left(i1-j1,i2-j2,i3-j3,5)
! print *, g1
! print *, Pl
!end if
!---------------------------------------------------------------------------
! Total energy per unit volume
!---------------------------------------------------------------------------
Tl = Dl*(El + 0.5*(Ul**2 + Vl**2 + Wl**2))
Tr = Dr*(Er + 0.5*(Ur**2 + Vr**2 + Wr**2))
!---------------------------------------------------------------------------
! Hydro fluxes
!---------------------------------------------------------------------------
fDl = Dl*Ul
fDr = Dr*Ur
fUl = Dl*Ul*Ul + Pl
fUr = Dr*Ur*Ur + Pr
fTl = Ul*(Tl + Pl)
fTr = Ur*(Tr + Pr)
fVl = Dl*Ul*Vl
fVr = Dr*Ur*Vr
fWl = Dl*Ul*Wl
fWr = Dr*Ur*Wr
!---------------------------------------------------------------------------
! maximum wave speeds to the l and r (guaranteed to have correct sign)
!---------------------------------------------------------------------------
SL = min(min(Ul,Ur)-cmax,0.0)
SR = max(max(Ul,Ur)+cmax,0.0)
!---------------------------------------------------------------------------
! HLL flux based on wavespeeds. The general form is
! (SR*F_l - SL*F_r + SL*SR *(u_r - u_l))/(SR-SL)
! where u = (rho, rho*v, E_tot) are the conserved variables
!---------------------------------------------------------------------------
flux(i1,i2,i3, 1) = (SR * fDl - SL*fDr + SL*SR*(Dr - Dl )) / (SR - SL)
flux(i1,i2,i3,v1) = (SR * fUl - SL*fUr + SL*SR*(Dr*Ur - Dl*Ul)) / (SR - SL)
flux(i1,i2,i3,v2) = (SR * fVl - SL*fVr + SL*SR*(Dr*Vr - Dl*Vl)) / (SR - SL)
flux(i1,i2,i3,v3) = (SR * fWl - SL*fWr + SL*SR*(Dr*Wr - Dl*Wl)) / (SR - SL)
flux(i1,i2,i3, 5) = (SR * fTl - SL*fTr + SL*SR*(Tr - Tl )) / (SR - SL)
end do
end do
end do
end subroutine hll
end MODULE riemann_mod
MODULE solver_mod
USE riemann_mod
USE omp_lib
use openacc
implicit none
private
integer, parameter:: nt=5
type, public:: solver_t
integer:: id
integer:: nw=1, nv=5, nt=nt, ng=2, n=16
integer:: it=1, new=2
integer, dimension(3):: gn, lb, lo, li, ui, uo, ub
real(8):: time=0d0, dtime=1d0
real(8):: t(nt), dt(nt), ds(3)=1d0
real:: slope_type
real:: courant
real:: csound
real(8):: g1, g2, gamma
character(len=16):: kind = 'ramses_hd_patch'
type(riemann_t):: riemann
real, dimension(:,:,:,:,:,:), allocatable:: mem
real, dimension(:,:,:,:) , allocatable :: prim
real, dimension(:,:,:,:) , allocatable :: left
real, dimension(:,:,:,:) , allocatable :: rght
real, dimension(:,:,:,:,:), allocatable :: grad
real, dimension(:,:,:,:,:), allocatable :: flux
real, dimension(:,:), allocatable :: ff
contains
procedure:: init
procedure:: update
end type
real(8):: start
!-----------------------------------------------------------------------------
! Input parameters:
!-----------------------------------------------------------------------------
integer:: n=16 ! internal cells
integer:: ng=2 ! guard cells
integer:: slope_type=2 ! slope limiter
integer:: verbose=0 ! info level
logical:: detailed_timer=.true. ! optional timer
real(8):: gamma=1.4 ! gas gamma
real:: csound=1.0 ! sound speed
real:: courant=0.2 ! courant number
character(len=4):: method='hll' ! riemann method
!$acc declare create(slope_type)
contains
SUBROUTINE init (self)
class(solver_t)::self
!.............................................................................
namelist /solver_params/ verbose, courant, gamma, csound, slope_type, &
method, detailed_timer
integer:: iostat, i
integer:: m, i1, i2, i3
real(8):: r1, r2, r3
real(8), parameter:: pi2=8d0*atan(1d0)
integer, save:: id=0
logical, save:: first_time=.true.
!-----------------------------------------------------------------------------
!$omp atomic update
id = id + 1
self%id = id
!-----------------------------------------------------------------------------
! Here we replace the call patch_t%init directly, to initialize %mem
!-----------------------------------------------------------------------------
self%gn = self%n + 2*ng ! all cells
self%lb = 1 ! lower buondary
self%li = 1 + ng ! lower internal index
self%lo = self%li - 1 ! lower outside index
self%ui = self%lo + n ! upper internal index
self%uo = self%ui + 1 ! upper outside index
self%ub = self%ui + ng ! uppper boundary
m = self%gn(1)
allocate (self%mem(m,m,m,self%nv,self%nt,self%nw))
self%mem(:,:,:,1,:,:) = 1.0
self%mem(:,:,:,5,:,:) = 1.0
do i3=1,self%gn(3)
r3 = (i3-1)/self%gn(3)
do i2=1,self%gn(2)
r2 = (i2-1)/self%gn(2)
do i1=1,self%gn(1)
r1 = (i1-1)/self%gn(1)
self%mem(i1,i2,i3,2,:,:) = sin(r1*pi2)
self%mem(i1,i2,i3,3,:,:) = sin(r2*pi2)
self%mem(i1,i2,i3,4,:,:) = sin(r3*pi2)
end do
end do
end do
if (first_time) then
print *, 'solver_t%init:'
print *, 'id', self%id
print *, 'gn', self%gn
print *, 'lb', self%lb
print *, 'lo', self%lo
print *, 'li', self%li
print *, 'ub', self%ub
print *, 'uo', self%uo
print *, 'ui', self%ui
print *, 'mem', shape(self%mem)
print *, 'muscl2'
start = omp_get_wtime()
first_time = .false.
end if
!call self%patch_t%init
!-----------------------------------------------------------------------------
! Ramses-like specific things
!-----------------------------------------------------------------------------
self%riemann%method = method
self%courant = courant
self%csound = csound
!-----------------------------------------------------------------------------
! Make sure to save and use consistent gamma factors throughout
!-----------------------------------------------------------------------------
self%gamma = gamma
if (gamma == 1d0) then
self%g1 = 1d0
else
self%g1 = gamma-1d0
end if
self%g2 = self%g1*gamma
self%riemann%g1 = self%g1
self%riemann%g2 = self%g2
call alloc(self)
END SUBROUTINE init
subroutine alloc(self)
class(solver_t) :: self
allocate(self%prim (self%gn(1), self%gn(2), self%gn(3),self%nv))
allocate(self%left (self%gn(1), self%gn(2), self%gn(3),self%nv))
allocate(self%rght (self%gn(1), self%gn(2), self%gn(3),self%nv))
allocate(self%grad (self%gn(1), self%gn(2), self%gn(3),self%nv, 3))
allocate(self%flux (self%gn(1), self%gn(2), self%gn(3),self%nv, 3))
allocate(self%ff (self%gn(1),3))
!For some reason the below lines have to be there even tho I CREATE the arrays on GPU
self%prim = 0.0
self%left = 0.0
self%rght = 0.0
self%grad = 0.0
self%flux = 0.0
self%ff = 0.0
end subroutine alloc
subroutine update(self)
class(solver_t),target :: self
print *, "test 0: ", self%mem(5,5,5,1,2,1)
!$acc data copyin(self) copy(self%mem) create(self%prim, &
!$acc self%left, self%rght, self%grad, self%flux)
!$acc parallel
call self%riemann%solve(1,self%lo,self%uo,self%left,self%rght,self%flux(:,:,:,:,1))
!------------
! VARIOUS OTHER DEVICE FUNCTION CALLS
!-------------
!$acc end parallel
!$acc end data
self%time = self%time + self%dtime
self%it = mod(self%it,5) + 1
self%new = mod(self%new,5) +1
end subroutine update
end MODULE solver_mod
MODULE task_list_mod
use solver_mod
implicit none
PRIVATE
type, public:: link_t
class(solver_t), pointer:: task=>null()
class(link_t), pointer:: next=>null(),prev=>null()
end type
type, public:: task_list_t
integer:: n=0
class(link_t),pointer:: head=>null(),tail=>null()
contains
procedure :: append
procedure :: pop
end type
contains
subroutine append(self,link)
class(task_list_t) :: self
class(link_t), pointer :: link
!print *, "currently in list: ", self%n
if (associated(self%tail)) then
link%prev => self%tail
self%tail%next => link
self%tail => link
else
self%tail => link
self%head => link
link%next => null()
link%prev => null()
end if
self%n = self%n +1
!print *, "now in list: ", self%n
end subroutine append
function pop(self) result(link)
class(task_list_t) :: self
class(link_t),pointer :: link
!print *, "starting to pop"
link => self%head
self%head => self%head%next
if (associated(self%head)) then
NULLIFY(self%head%prev)
else
NULLIFY(self%head)
NULLIFY(self%tail)
end if
self%n = self%n -1
!print *, "Popped. remaining: ", self%n
end function pop
end MODULE task_list_mod
PROGRAM mockup
use solver_mod
use task_list_mod
class(solver_t), pointer:: task
class(link_t), pointer :: link
type(task_list_t):: task_list
real :: start_time, stop_time
integer:: i
integer:: n=16
integer ::ntask=1
real(8):: end_time=1d0
do i = 1,ntask
allocate(task,link)
link%task => task
call task%init
call task_list%append (link)
end do
call cpu_time(start_time)
do while(task_list%n > 0)
link => task_list%pop()
call link%task%update
if (link%task%time < end_time) then
call task_list%append(link)
end if
end do
call cpu_time(stop_time)
!print *, "Total time: ", stop_time-start_time
End program mockup