Hi,
The following sample program fails to compile with message: unsupported instruction: ARDF when using the function version of it(sm). The subroutine version (sm1) works fine though
program test
implicit none
real, dimension(30,30,30) :: xx
real, dimension(30,30) :: val
!$acc declare create(xx,val)
real:: v
integer:: i,j
!$acc kernels
xx(:,:,:) = 1.0
val(:,:) = 0.0
!$acc end kernels
!$acc parallel loop collapse(2) independent
do i = 1,30
do j = 1,30
val(i,j) = sm(xx(i,1:20,j))
enddo
enddo
!$acc end parallel
!!$acc parallel loop collapse(2) independent
! do i = 1,30
! do j = 1,30
! call sm1(xx(i,1:20,j),val(i,j))
! enddo
! enddo
!!$acc end parallel
!$acc update host(val)
print*, val
contains
function sm(xx)
!$acc routine vector
implicit none
real,intent(in):: xx(20)
real,intent(out)::sm
real:: sum
integer k
sum = 0.0
do k = 1, 20
sum = sum + xx(k)
enddo
sm = sum
end function sm
subroutine sm1(xx,v)
!$acc routine vector
implicit none
real,intent(in):: xx(20)
real,intent(out)::v
real:: sum
integer k
sum = 0.0
do k = 1, 20
sum = sum + xx(k)
enddo
v = sum
end subroutine
end program test
Error compiling
$ pgf90 -acc -Minfo=accel -ta=tesla,cc60,cc70,cuda9.0 oo.F90
PGF90-S-0155-Accelerator region ignored; see -Minfo messages (oo.F90: 14)
test:
5, Generating create(xx(:,:,:),val(:,:))
10, Loop is parallelizable
Accelerator kernel generated
Generating Tesla code
10, !$acc loop gang, vector(4) ! blockidx%z threadidx%y
!$acc loop gang, vector(32) ! blockidx%x threadidx%x
!$acc loop gang ! blockidx%y
11, Loop is parallelizable
Accelerator kernel generated
Generating Tesla code
11, !$acc loop gang, vector(4) ! blockidx%y threadidx%y
!$acc loop gang, vector(32) ! blockidx%x threadidx%x
14, Accelerator region ignored
16, Accelerator restriction: an unsupported operation was found
17, Accelerator restriction: unsupported operation: ARDF
0 inform, 0 warnings, 1 severes, 0 fatal for test
sm:
33, Generating Tesla code
43, !$acc loop vector ! threadidx%x
43, Loop is parallelizable
sm1:
49, Generating Tesla code
59, !$acc loop vector ! threadidx%x
59, Loop is parallelizable
Daniel