Dear all,
I have a small test program (see below), which compares different calling types invoking a derived type: via generic functions, via type bound procedures using a type instance and via type bound procedures using a class instance. Most other compilers I have checked, the first two call types are one order of magnitude faster (ca. 0.3s) than the third one (ca. 3s). The current pgfortran (17.10) produces a binary where all three are equally slow (ca. 3s). I am wondering, whether any compiler flag can speed it up (I’ve tried -O3), or it is an internal optimisation problem.
module testmod
implicit none
type :: TStatic
private
integer :: val = 1
end type TStatic
type :: TPoly
private
integer :: val = 1
contains
procedure :: incValue => TPoly_incValue
procedure :: getValue => TPoly_getValue
end type TPoly
interface incValue
module procedure TStatic_incValue
end interface incValue
interface getValue
module procedure TStatic_getValue
end interface getValue
contains
subroutine TStatic_incValue(this, increment)
type(TStatic), intent(inout) :: this
integer, intent(in) :: increment
this%val = this%val + increment
end subroutine TStatic_incValue
function TStatic_getValue(this) result(val)
type(TStatic), intent(in) :: this
integer :: val
val = this%val
end function TStatic_getValue
subroutine TPoly_incValue(this, increment)
class(TPoly), intent(inout) :: this
integer, intent(in) :: increment
this%val = this%val + increment
end subroutine TPoly_incValue
function TPoly_getValue(this) result(val)
class(TPoly), intent(in) :: this
integer :: val
val = this%val
end function TPoly_getValue
end module testmod
program test
use testmod
implicit none
type(TStatic) :: staticInst
type(TPoly) :: polyInst, polyInst2
class(TPoly), allocatable :: classInst
integer :: nCycles
integer :: ii
real :: t1, t2
nCycles = 1000000000 ! 1e9
print '(A,I0)', 'Nr. of iterations:', nCycles
call cpu_time(t1)
do ii = 1, nCycles
call incValue(staticInst, ii)
end do
call cpu_time(t2)
print '(A,T30,I0,F6.2)', 'Static:', getValue(staticInst), t2 - t1
call cpu_time(t1)
do ii = 1, nCycles
call polyInst%incValue(ii)
end do
call cpu_time(t2)
print '(A,T30,I0,F6.2)', 'Polymorhic via type:', polyInst%getValue(), t2 - t1
allocate(classInst, source=polyInst2)
call cpu_time(t1)
do ii = 1, nCycles
call classInst%incValue(ii)
end do
call cpu_time(t2)
print '(A,T30,I0,F6.2)', 'Polymorphic via class:', classInst%getValue(), t2 - t1
end program test