This test is a tentative to investigate the abstraction overhead in modern Fortran: it could be not well-posed and not interesting at all, it is just a replay of a question on a Google Group CLF thread
The overhead.f90
test porgram contains
module constants
implicit none
private
integer, parameter, public :: n = 1e9
integer, parameter, public :: m = 1e3
end module constants
module base_t
use constants, only : m
implicit none
private
type, abstract, public :: Base
real :: x(m)
contains
procedure(base_add), deferred, pass(self) :: add
end type
abstract interface
subroutine base_add(self, ndx, val)
import :: Base
class(Base), intent(inout) :: self
integer, intent(in) :: ndx
real, intent(in) :: val
end subroutine
end interface
end module base_t
module derived_t
use base_t, only : Base
implicit none
private
type, extends(Base), public :: Derived
contains
procedure, pass(self) :: add => derived_add
end type
contains
subroutine derived_add(self, ndx, val)
class(Derived), intent(inout) :: self
integer, intent(in) :: ndx
real, intent(in) :: val
self%x(ndx) = val
end subroutine
end module derived_t
module solid_t
use constants, only : m
implicit none
type :: Solid
real :: x(m)
contains
procedure, pass(self) :: add => solid_add
end type
contains
subroutine solid_add(self, ndx, val)
class(Solid), intent(inout) :: self
integer, intent(in) :: ndx
real, intent(in) :: val
self%x(ndx) = val
end subroutine
end module solid_t
program overhead
use base_t, only : Base
use constants, only : m, n
use derived_t, only : Derived
use solid_t, only : Solid
implicit none
integer :: profiling(1:2)
integer :: count_rate
call system_clock(profiling(1), count_rate)
call test_solid
call system_clock(profiling(2), count_rate)
print "(A)", "Solid, non abstract add"
print*, real(profiling(2) - profiling(1))/count_rate
call system_clock(profiling(1), count_rate)
call test_derived
call system_clock(profiling(2), count_rate)
print "(A)", "Derived, abstract add"
print*, real(profiling(2) - profiling(1))/count_rate
stop
contains
subroutine test_solid()
integer :: i
integer :: ndx
real :: val
type(solid) :: s
do i = 1, n
call random_number(val)
ndx = m * val
ndx = max(ndx, 1)
call s%add(ndx, val)
end do
return
end subroutine
subroutine test_derived()
integer :: i
integer :: ndx
real :: val
class(Base), allocatable :: d
allocate(Derived :: d)
do i = 1, n
call random_number(val)
ndx = m * val
ndx = max(ndx, 1)
call d%add(ndx, val)
end do
end subroutine
end program overhead
On an Intel dual six-cores Intel(R) Xeon(R) CPU X5650 @ 2.67GHz the bechmarks show
Add type | GNU gfortran 5.3.0 | Intel ifort 15.0.3 |
---|---|---|
Non abstract | 7.21899986 | 7.515000 |
Abstract | 9.01000023 | 13.49110 |
For both GNU and Intel compilers the compiling flags were -O3
that means different things for the two vendors, but in general enables a certain level of optimization.
Add type | GNU gfortran 5.3.0 | Intel ifort 15.0.3 |
---|---|---|
Non abstract | 18.9899998 | 27.53160 |
Abstract | 15.0279999 | 21.80600 |
For both GNU and Intel compilers the compiling flags were -O0
that should disable optimizations.
For this (possible naive) test it seems that relevant overheads happen for optimized compilations.
Great work.
Thank you Stefano.