Hi again. May have found another one. The following code will error out on 15.10 but works on 15.7. line 100 is the one "!$acc kernels " one in inline_kernels (I left away some comments).
./example_gpu
hostptr=0x12adaa0,stride=1,size=655360,eltsize=4,name=a,flags=0x200=present,async=-1,threadid=1
FATAL ERROR: data in PRESENT clause was not found on device 1: name=a host:0x12adaa0
file:/home/michel/hybrid/examples/openACC_hybrid_hostonly/build/gpu/source/example.f90 inline_kernels line:100
pgf90 -acc -Minfo=accel,inline,ipa -Mneginfo -Mcuda=cc2x,cc3x -ta=nvidia,cc2x,cc3x -fast -DGPU -c example.f90 -o example.o
module example
contains
subroutine host_only_subroutine(a, b, c, d)
use openacc
use cudafor
real, dimension(256, 256, 10), intent(in) :: a, b
real, dimension(256, 256, 10), intent(out) :: c, d
call inline_kernels(a, b, c, d)
end subroutine
subroutine wrapper_inline_kernels(a, b, c, d)
use openacc
use cudafor
real, intent(in) :: a(256, 256, 10), b(256, 256, 10)
real, intent(out) :: c(256, 256, 10), d(256, 256, 10)
! ****** additional symbols inserted by framework to emulate device support of language features
integer(4) :: hf_symbols_are_device_present
!$acc enter data copyin(a), copyin(c), copyin(b), copyin(d)
hf_symbols_are_device_present = acc_is_present(a)
! ****** end additional symbols
call inline_kernels (a, b, c, d)
!$acc exit data delete(a), copyout(c), delete(b), copyout(d)
end subroutine
subroutine inline_kernels(a, b, c, d)
use openacc
use cudafor
real, intent(in) :: a(256, 256, 10), b(256, 256, 10)
real, intent(out) :: c(256, 256, 10), d(256, 256, 10)
integer(4) :: y, x
integer(4) :: hf_symbols_are_device_present
hf_symbols_are_device_present = acc_is_present(a)
!$acc kernels if(hf_symbols_are_device_present) present(a) present(c) present(b) present(d)
!$acc loop independent vector(16)
do y=1,256
!$acc loop independent vector(16)
do x=1,256
!$acc loop seq
do z=1,10
c(x, y, z)= a(x, y, z)+ b(x, y, z)
end do
end do
end do
!$acc end kernels
!$acc kernels if(hf_symbols_are_device_present) present(a) present(c) present(b) present(d)
!$acc loop independent vector(16)
do y=1,256
!$acc loop independent vector(16)
do x=1,256
!$acc loop seq
do z=1,10
d(x, y, z)= a(x, y, z)* b(x, y, z)
end do
end do
end do
!$acc end kernels
end subroutine
end module example
program main
use example
real, dimension(256, 256, 10) :: a, b, c, d, e, f
integer :: x, y, z
integer :: fail_x, fail_y, fail_z
logical test
a(:,:,:) = 1.0d0
b(:,:,:) = 2.0d0
c(:,:,:) = 0.0d0
d(:,:,:) = 0.0d0
e(:,:,:) = 0.0d0
f(:,:,:) = 0.0d0
test = .TRUE.
call host_only_subroutine(a, b, c, d)
call wrapper_inline_kernels(c, d, e, f)
write(6,*) "calculation complete"
do y=1,256
do x=1,256
do z=1,10
if (test .EQ. .TRUE. .AND. e(x, y, z) .NE. 5.0d0) then
test = .FALSE.
fail_x = x
fail_y = y
fail_z = z
end if
if (test .EQ. .TRUE. .AND. f(x, y, z) .NE. 6.0d0) then
test = .FALSE.
fail_x = x
fail_y = y
fail_z = z
end if
end do
end do
end do
if (test .EQ. .TRUE.) then
write(6,*) "test ok"
else
write(6,*) "test failed"
write(6,*) "fails at", fail_x, fail_y, fail_z, "E:", c(fail_x, fail_y, fail_z), "F:", d(fail_x, fail_y, fail_z)
stop 2
end if
stop
end program main