Hi,
I have tried the new 12.9 version of the compiler, for an OpenACC code. I am seeing a new error message (I don’t remember seing this in previous version):
PGF90-W-0155-Compiler failed to translate accelerator region (see -Minfo messages): Load of NULL symbol (test_diag_level.f90: 71)
-
I have made a reduce test code were I can reproduce the error message (see below)
-
Is there an option to force the compiler to crash when it can not translate an accelerator region which has an explicit “parallel” or “loop” construct. Indeed in my case the kernel are embeded in a large data region, and there is no point to compute this one loop on the CPU as the data is not valid there at this point.
! test programme OpenACC
module data_field
implicit none
real, allocatable :: zdia_2d(:), hk_2d(:), hk1_2d(:), hhl(:,:)
integer, allocatable :: k_2d(:)
END module data_field
module data_allocation
CONTAINS
subroutine my_allocation(nvec,nlevel)
USE data_field, only: zdia_2d, hk_2d, hk1_2d, hhl,k_2d
integer :: nvec, nlevel
allocate(zdia_2d(nvec), hk_2d(nvec), hk1_2d(nvec), hhl(nvec,nlevel),k_2d(nvec))
end subroutine my_allocation
subroutine my_deallocation
USE data_field, only: zdia_2d, hk_2d, hk1_2d, hhl,k_2d
deallocate(zdia_2d, hk_2d, hk1_2d, hhl,k_2d)
end subroutine my_deallocation
end module data_allocation
module computation
implicit none
contains
!==============================================================================
SUBROUTINE diag_level_gpu (ipstart, ipend, ke1, zdia_2d, k_2d, &
hk_2d, hk1_2d, hhl)
!GPU version : it is faster to go through the whole data then to preform a
! reduction lcheck logical
!------------------------------------------------------------------------------
INTEGER, INTENT(IN) :: &
ipstart,ipend, & ! start end end indices of horizontal domain
ke1 ! vertical dimension of hhl
REAL, INTENT(IN) :: &
zdia_2d(:) !diagnostic height
INTEGER, INTENT(INOUT) :: &
k_2d(:) !index field of the upper level index
!to be used of near surface diagnostics
REAL, INTENT(INOUT) :: &
hk_2d(:), & ! mid level height above ground belonging to 'k_2d'
hk1_2d(:) ! mid level height above ground of the previous layer (below)
REAL , INTENT(IN) :: &
hhl(:,:) ! height of half levels
INTEGER :: ip,k
REAL :: zhk_2d, zhk1_2d
INTEGER :: zk_2d
!------------------------------------------------------------------------------
!$acc data present(zdia_2d, k_2d, hk_2d, hk1_2d, hhl )
!$acc parallel
!$acc loop gang vector
DO ip=ipstart,ipend
DO k=k_2d(ip)-1,1,-1
!!$
!!$ IF ( hk_2d(ip)<zdia_2d(ip) ) THEN !diagnostic level is above current layer
!!$ k_2d(ip)=k
!!$ hk1_2d(ip)=hk_2d(ip)
!!$ hk_2d(ip)=(hhl(ip,k)+hhl(ip,k+1))*0.5-hhl(ip,ke1)
!!$ END IF
!XL: alternative implementation due to issue with PGI 11.9
zk_2d = k_2d(ip)
zhk_2d = hk_2d(ip)
zhk1_2d = hk1_2d(ip)
IF ( zhk_2d < zdia_2d(ip) ) THEN !diagnostic level is above current layer
zk_2d=k
zhk1_2d=zhk_2d
zhk_2d=(hhl(ip,k)+hhl(ip,k+1))*0.5-hhl(ip,ke1)
END IF
hk_2d(ip)=zhk_2d
hk1_2d(ip)=zhk1_2d
k_2d(ip) = zk_2d
END DO
END DO
!$acc end parallel
!$acc end data
END SUBROUTINE diag_level_gpu
end module computation
program main
USE data_field, only: zdia_2d, hk_2d, hk1_2d, hhl,k_2d
USE data_allocation, only: my_allocation, my_deallocation
USE computation, only: diag_level_gpu
implicit none
integer :: nvec,nlevel, nargs,i,j,k,nt
character*10 arg
integer :: nblock
real :: rt
INTEGER :: icountnew, icountold, icountrate, icountmax
nargs = command_argument_count()
nlevel=8
if( nargs == 1 ) then
call getarg( 1, arg )
read(arg,'(i)') nvec
else
stop('usage ./test n')
endif
CALL my_allocation(nvec,nlevel)
!$acc data create(zdia_2d, hk_2d, hk1_2d, hhl,k_2d)
!init
!$acc parallel
!$acc loop
do i=1,nvec
zdia_2d(i) = 10.0*(1+1/i)
hk_2d(i) = 10.5
hk1_2d(i)= 10.0
k_2d=2
end do
!$acc end parallel
!$acc parallel
do k=1,nlevel
!$acc loop
do i=1,nvec
hhl=5.0*(k-1)
end do
end do
!$acc end parallel
CALL SYSTEM_CLOCK(COUNT=icountold,COUNT_RATE=icountrate,COUNT_MAX=icountmax)
call diag_level_gpu (1, nvec, nlevel, zdia_2d, k_2d, &
hk_2d, hk1_2d, hhl)
CALL SYSTEM_CLOCK(COUNT=icountnew)
!$acc update host(k_2d,hk_2d, hk1_2d)
!$acc end data
rt = ( REAL(icountnew) - REAL(icountold) ) / REAL(icountrate)
print*, 'n=', nvec, sum(hk_2d)/nvec, sum(hk1_2d)/nvec, sum(k_2d)/nvec
write(*,20) rt*1.0e3
20 format( ' time/step=', f10.5, ' ms' )
CALL my_deallocation
end program main
pgf90 -Minfo -ta=nvidia -o test_diag_level test_diag_level.f90
PGF90-W-0155-Compiler failed to translate accelerator region (see -Minfo messages): Load of NULL symbol (test_diag_level.f90: 71)
diag_level_gpu:
69, Generating present(hhl(:,:))
Regards,
Xavier