Please test the following code by pgfortran V11.4.
program test
use kernel
IMPLICIT NONE
double precision, device :: data1(10)
double precision :: data2(10)
call functest<<<1,10>>>(data1)
data2 = data1
write(0,*) data2
end program
MODULE kernel
CONTAINS
attributes(global) SUBROUTINE functest(output)
IMPLICIT NONE
double precision :: output(:)
double precision :: s
s = 0.d0
output(threadidx%x) = 2.d0*func(s)
END SUBROUTINE functest
!===============================================================================
attributes(device) FUNCTION func(s)
!
IMPLICIT NONE
double precision, INTENT(INOUT) :: s
double precision :: func
s = s+1.d0
func = s
RETURN
END FUNCTION func
END MODULE kernel
Compiled by “pgfortran -Mcuda test.F90”
The outputs:
3.000000000000000 3.000000000000000 3.000000000000000
3.000000000000000 3.000000000000000 3.000000000000000
3.000000000000000 3.000000000000000 3.000000000000000
3.000000000000000
Analysis:
output(threadidx%x) = 2.d0*func(s)
translated into .gpu
output[threadidx%x] = func(s)+func(s)