Here you go. Using the initial reflected example as the base, I modified X to be allocatable and then added the mirror directive. mirror creates an implicit data region with the same scope as the variable, hence I removed the original explicit data region. I also put X’s initialisation loop into an compute region so I didn’t need to copy the data (otherwise you need to use an update directive to get the data over to the GPU).
would i need to have an interface between main and intermediate and an interface between intermediate and accumulateTrigo?
Yes and I updated the example (reflect3.f90) to reflect this. However, it would be uncommon to do this. More likely you would move these routines into a module where an implicit interface is created or create a module that contains nothing but an interface (reflect4.f90). The C equivalent would be a header file with prototype functions.
Much of your challenge with GAMESS will be porting it to F90. But, at least in my opinion, F90 is a much better language than F77 and well worth the effort.
% cat reflect3.f90
subroutine accumulateTrigo(a, size, sum)
integer :: ii,jj, size
real, dimension(size) :: a
real :: sum
!$acc reflected (a)
do jj=1,500
sum=0.0
!$acc region
do ii=1,size
sum = sum + sin(a(ii)) ** 2 + cos(a(ii)) ** 2
enddo
!$acc end region
enddo
return
end subroutine
subroutine intermediate(a, size, sum)
integer :: size
real, dimension(size) :: a
real :: sum
!$acc reflected (a)
interface
subroutine accumulateTrigo(a, size, sum)
integer :: size
real, dimension(size) :: a
real :: sum
!$acc reflected (a)
end subroutine accumulateTrigo
end interface
print *, 'INTER size=', size
call accumulateTrigo(a, size, sum)
print *, 'INTER sum=', sum
end subroutine intermediate
program main
real, allocatable, dimension(:) :: X
integer :: Xsize,m,i,k,c1,c2
real :: lastSum
!$acc mirror (X)
interface
subroutine intermediate(a, size, sum)
integer :: size
real, dimension(size) :: a
real :: sum
!$acc reflected (a)
end subroutine intermediate
end interface
Xsize = 100000
allocate(X(Xsize))
m = 5 ! m calls to subroutine accumulateTrigo
! GPU initialization
#ifdef _ACCEL
call acc_init( acc_device_nvidia )
#endif
! initialization of array X
!$acc region do
do i = 1,Xsize
X(i) = (i*2.0)
enddo
! computations on GPU
call system_clock( count=c1 )
do k= 1, m
call intermediate(X, Xsize, lastSum)
enddo
print *, "LAST = ", lastSum
call system_clock( count=c2 )
print *, (c2-c1)/1000.0, ' milliseconds'
end program
% cat reflect4.f90
module myinter
interface
subroutine accumulateTrigo(a, size, sum)
integer :: size
real, dimension(size) :: a
real :: sum
!$acc reflected (a)
end subroutine accumulateTrigo
subroutine intermediate(a, size, sum)
integer :: size
real, dimension(size) :: a
real :: sum
!$acc reflected (a)
end subroutine intermediate
end interface
end module myinter
subroutine accumulateTrigo(a, size, sum)
integer :: ii,jj, size
real, dimension(size) :: a
real :: sum
!$acc reflected (a)
do jj=1,500
sum=0.0
!$acc region
do ii=1,size
sum = sum + sin(a(ii)) ** 2 + cos(a(ii)) ** 2
enddo
!$acc end region
enddo
return
end subroutine
subroutine intermediate(a, size, sum)
use myinter
integer :: size
real, dimension(size) :: a
real :: sum
!$acc reflected (a)
print *, 'INTER size=', size
call accumulateTrigo(a, size, sum)
print *, 'INTER sum=', sum
end subroutine intermediate
program main
use myinter
real, allocatable, dimension(:) :: X
integer :: Xsize,m,i,k,c1,c2
real :: lastSum
!$acc mirror (X)
Xsize = 100000
allocate(X(Xsize))
m = 5 ! m calls to subroutine accumulateTrigo
! GPU initialization
#ifdef _ACCEL
call acc_init( acc_device_nvidia )
#endif
! initialization of array X
!$acc region do
do i = 1,Xsize
X(i) = (i*2.0)
enddo
! computations on GPU
call system_clock( count=c1 )
do k= 1, m
call intermediate(X, Xsize, lastSum)
enddo
print *, "LAST = ", lastSum
call system_clock( count=c2 )
print *, (c2-c1)/1000.0, ' milliseconds'
end program