error when using device_resident and present directives

Hi,

I am trying to make use of the device_resident directive in the following test code:

! test programme OpenACC 
module data_field 
  implicit none 
  real*8, allocatable :: a(:),b(:) 
  ! acc mirror(a,b) !PGI syntax, not used
END module data_field 

module work_array 
  implicit none 
  real*8, allocatable :: a1(:), a2(:),  a3(:), a4(:), a5(:),  a6(:) 
  real*8, allocatable :: zparam(:)
  ! acc mirror(a1,a2,a3,a4,a5,a6,zparam) !PGI syntax, not used
  !$acc declare device_resident(a1,a2,a3,a4,a5,a6,zparam)
END module work_array 

module computation 
  implicit none  
contains 

subroutine gpu_routine(nvec,a,b) 
  USE work_array, only: a1,a2,a3,a4,a5,a6,zparam 
  integer, intent(in) :: nvec 
  real*8, intent(inout) :: a(nvec) 
  real*8, intent(in) :: b(nvec) 
  integer :: i, iparam 

  !$acc data present(a,b) & 
  !$acc& present(a1,a2,a3,a4,a5,a6) & 
  !$acc& present(zparam) 
  
  !$acc kernels 
  DO iparam=1,8 
     zparam(iparam)=0.1D0*iparam 
  END DO 
  !$acc end kernels 
  

  !$acc kernels 
  Do i=1,nvec        
     a1(i)=0.1D0*(1.0D0+1.0D0/i) 
     a2(i)=0.2D0*(1.0D0+1.0D0/i) 
     a3(i)=0.3D0*(1.0D0+1.0D0/i) 
     a4(i)=0.4D0*(1.0D0+1.0D0/i) 
     a5(i)=0.5D0*(1.0D0+1.0D0/i) 
     a6(i)=0.6D0*(1.0D0+1.0D0/i) 
  END do 
  !$acc end kernels 

  !$acc kernels loop 
  do i=1,nvec        
     do iparam=1,8 ! just to imitate several operations 
        a(i)=zparam(iparam)*(1+cos(a(i)))+b(i)*(1.0D0+sin(1.0D0+a1(i)+a2(i)+a3(i)+a4(i)+a5(i)+a6(i)))
      end do 
  end do !i 
  !$acc end kernels 

  !$acc end data 
  
end subroutine gpu_routine 

end module computation 
  
program main 
  USE data_field, only: a,b 
  USE work_array, only: a1,a2,a3,a4,a5,a6,zparam 
  USE computation, only: gpu_routine 
  implicit none 
  integer :: n1,n2 
  integer :: nargs,i,j,k,nt, niter 
  character*10 arg 
  integer :: nvec,nblock 
  real*8 :: rt 
  INTEGER ::  icountnew, icountold, icountrate, icountmax 
  INTEGER :: z_sync(2) !use for synchronization 

  nargs = command_argument_count() 
  niter=10 
  if( nargs == 2 ) then 
     call getarg( 1, arg ) 
     read(arg,'(i)') n1 
     call getarg( 2, arg ) 
     read(arg,'(i)') n2 
  else 
     stop('usage ./test n1 n2') 
  endif 
  
 nvec=n1*n2 

 
  
 allocate(a(nvec),b(nvec)) 
 allocate(a1(nvec), a2(nvec), a3(nvec),a4(nvec),a5(nvec),a6(nvec)) 
 allocate(zparam(8)) 
 z_sync(:)=1 

!$acc data create(a,b,z_sync)  

 !$acc kernels 
 do i=1,nvec 
    a(i)=0.0D0 
    b(i)=0.1D0 
 end do 
 !$acc end kernels 
  
 !$acc update device(z_sync) 
 CALL SYSTEM_CLOCK(COUNT=icountold,COUNT_RATE=icountrate,COUNT_MAX=icountmax) 
  
 do nt=1,niter 
       call gpu_routine(nvec,a,b) 
 end do 

 !$acc update device(z_sync) 
  CALL SYSTEM_CLOCK(COUNT=icountnew) 
  !$acc update host(a) 


 rt = ( REAL(icountnew) - REAL(icountold) ) / REAL(icountrate) 
 print*, 'n1 =', n1, 'n2=', n2, sum(a), sum(z_sync) 
  write(*,20) rt*1.0e3/niter 
20 format( ' time/step=', f10.5, ' ms' ) 

 DEALLOCATE(a,b,a1,a2,a3,a4,a5,a6,zparam) 
  !$acc end data 

end program main

I can compile this code, however, when I try to run it I am getting the following error:

>./test_present_devres 100 100
FATAL ERROR: data in PRESENT clause was not found: name=zparam
 file:/users/lapixa/GPU/test_openacc/test_present/test_present_devres.f90 gpu_routine line:27

Am I using the directive incorrectly ?

Also one question concerning this directive. Here I am using it for arrays which are only access on the GPU side. Would it be possible to use it for array “a” which is also needed on the CPU side (like the mirror directive with the PGIacc)

Thanks for your help,

Xavier

Hi Xavier,

We’re still missing a few OpenACC features and device resistant is one of them. I just ask Michael for an eta, and he expects it to be in by the 12.10 release.

  • Mat

I experience the same problem. Waiting for it to be fixed.

I’ve also run into this problem. Will using the ‘create’ clause work for this case? They seem to have almost the same functionality.

From section 2.11.1 of the OpenAcc spec:
The device_resident specifies that the memory for the named variables should be allocated in the accelerator device memory, not in the host memory.

From section 2.7.5 of the spec:
2.7.5 create clause
The create clause is used to declare that the variables, arrays or subarrays in the list need to be allocated (created) in the device memory, but the values in the host memory are not needed on the accelerator, and any values computed and assigned on the accelerator are not needed on the host. No data in this clause will be copied between the host and device memories.

-Karen

Hi Karen,

“create” is fine to use. The man difference is the scope and lifetime. Create is limited to the scope and lifetime of the data region. While “device_resident” has a same scope and lifetime as the variable.

Hope this helps,
Mat

Hi Mat,
Thanks for the clarification.
Karen