Hi,
I am trying to make use of the device_resident directive in the following test code:
! test programme OpenACC
module data_field
implicit none
real*8, allocatable :: a(:),b(:)
! acc mirror(a,b) !PGI syntax, not used
END module data_field
module work_array
implicit none
real*8, allocatable :: a1(:), a2(:), a3(:), a4(:), a5(:), a6(:)
real*8, allocatable :: zparam(:)
! acc mirror(a1,a2,a3,a4,a5,a6,zparam) !PGI syntax, not used
!$acc declare device_resident(a1,a2,a3,a4,a5,a6,zparam)
END module work_array
module computation
implicit none
contains
subroutine gpu_routine(nvec,a,b)
USE work_array, only: a1,a2,a3,a4,a5,a6,zparam
integer, intent(in) :: nvec
real*8, intent(inout) :: a(nvec)
real*8, intent(in) :: b(nvec)
integer :: i, iparam
!$acc data present(a,b) &
!$acc& present(a1,a2,a3,a4,a5,a6) &
!$acc& present(zparam)
!$acc kernels
DO iparam=1,8
zparam(iparam)=0.1D0*iparam
END DO
!$acc end kernels
!$acc kernels
Do i=1,nvec
a1(i)=0.1D0*(1.0D0+1.0D0/i)
a2(i)=0.2D0*(1.0D0+1.0D0/i)
a3(i)=0.3D0*(1.0D0+1.0D0/i)
a4(i)=0.4D0*(1.0D0+1.0D0/i)
a5(i)=0.5D0*(1.0D0+1.0D0/i)
a6(i)=0.6D0*(1.0D0+1.0D0/i)
END do
!$acc end kernels
!$acc kernels loop
do i=1,nvec
do iparam=1,8 ! just to imitate several operations
a(i)=zparam(iparam)*(1+cos(a(i)))+b(i)*(1.0D0+sin(1.0D0+a1(i)+a2(i)+a3(i)+a4(i)+a5(i)+a6(i)))
end do
end do !i
!$acc end kernels
!$acc end data
end subroutine gpu_routine
end module computation
program main
USE data_field, only: a,b
USE work_array, only: a1,a2,a3,a4,a5,a6,zparam
USE computation, only: gpu_routine
implicit none
integer :: n1,n2
integer :: nargs,i,j,k,nt, niter
character*10 arg
integer :: nvec,nblock
real*8 :: rt
INTEGER :: icountnew, icountold, icountrate, icountmax
INTEGER :: z_sync(2) !use for synchronization
nargs = command_argument_count()
niter=10
if( nargs == 2 ) then
call getarg( 1, arg )
read(arg,'(i)') n1
call getarg( 2, arg )
read(arg,'(i)') n2
else
stop('usage ./test n1 n2')
endif
nvec=n1*n2
allocate(a(nvec),b(nvec))
allocate(a1(nvec), a2(nvec), a3(nvec),a4(nvec),a5(nvec),a6(nvec))
allocate(zparam(8))
z_sync(:)=1
!$acc data create(a,b,z_sync)
!$acc kernels
do i=1,nvec
a(i)=0.0D0
b(i)=0.1D0
end do
!$acc end kernels
!$acc update device(z_sync)
CALL SYSTEM_CLOCK(COUNT=icountold,COUNT_RATE=icountrate,COUNT_MAX=icountmax)
do nt=1,niter
call gpu_routine(nvec,a,b)
end do
!$acc update device(z_sync)
CALL SYSTEM_CLOCK(COUNT=icountnew)
!$acc update host(a)
rt = ( REAL(icountnew) - REAL(icountold) ) / REAL(icountrate)
print*, 'n1 =', n1, 'n2=', n2, sum(a), sum(z_sync)
write(*,20) rt*1.0e3/niter
20 format( ' time/step=', f10.5, ' ms' )
DEALLOCATE(a,b,a1,a2,a3,a4,a5,a6,zparam)
!$acc end data
end program main
I can compile this code, however, when I try to run it I am getting the following error:
>./test_present_devres 100 100
FATAL ERROR: data in PRESENT clause was not found: name=zparam
file:/users/lapixa/GPU/test_openacc/test_present/test_present_devres.f90 gpu_routine line:27
Am I using the directive incorrectly ?
Also one question concerning this directive. Here I am using it for arrays which are only access on the GPU side. Would it be possible to use it for array “a” which is also needed on the CPU side (like the mirror directive with the PGIacc)
Thanks for your help,
Xavier