Hi,
I am trying to use pointer in a Fortran code using OpenACC. I made the following example which seems to work with PGI 12.10. Is it safe to assume that that passing the host adress that way is correct. And that I can use the present directive inside the subroutine ?
In the OpenACC 1.0 document there is the following statment:
In Fortran, array pointers may be specified, but pointer association is not preserved in the device memory.
but I am sure what are the exact implications.
Also I haven’t seen anything about this on the “Proposed Additions For OpenACC 2.0” document.
When looking at the Minfo message I get :
“80, Possible copy in and copy out of pa in call to gpu_routine”
I checked with the cuda profiler and there is actually no copy at execution at this point. But I don’t know what would trigger it.
Here is the code:
! test programme OpenACC
! this code tests the use
! pointer
module data_field
implicit none
real*8, target, allocatable :: a(:)
real*8, pointer :: pa(:)
END module data_field
module computation
implicit none
contains
subroutine gpu_routine(nvec,a)
integer, intent(in) :: nvec
real*8, intent(inout) :: a(nvec)
integer :: i,iter
!DIR$ INLINENEVER gpu_routine
!$acc data present(a)
do iter=1,20
!$acc parallel
!$acc loop
do i=1,nvec
a(i)=a(i)+2.0
end do !i
!$acc end parallel
end do
!workaround ! acc end data
!$acc end data
end subroutine gpu_routine
end module computation
program main
USE data_field, only: a, pa
USE computation, only: gpu_routine
implicit none
integer :: nargs,i,j,k,nt, niter
character*10 arg
integer :: nvec,nblock
real*8 :: rt
INTEGER :: icountnew, icountold, icountrate, icountmax
nargs = command_argument_count()
niter=10
if( nargs == 1 ) then
call getarg( 1, arg )
read(arg,'(i)') nvec
else
stop('usage ./test n')
endif
allocate(a(nvec))
pa=>a
!$acc data create(a)
!$acc parallel
!$acc loop
do i=1,nvec
a(i)=0.0
end do !i
!$acc end parallel
CALL SYSTEM_CLOCK(COUNT=icountold,COUNT_RATE=icountrate,COUNT_MAX=icountmax)
do nt=1,niter
call gpu_routine(nvec,pa)
end do
CALL SYSTEM_CLOCK(COUNT=icountnew)
!$acc update host(a)
!$acc end data
rt = ( REAL(icountnew) - REAL(icountold) ) / REAL(icountrate)
print*, 'n=', nvec, sum(a)
write(*,20) rt*1.0e3/niter
20 format( ' time/step=', f10.5, ' ms' )
NULLIFY(pa)
DEALLOCATE(a)
end program main
Thanks,
Xavier