szaghi/bench_select_if_goto.md

## bench_select_if_goto.md

      
    Raw
  

              bench_select_if_goto.md
            
          
    Aims

Compare the performance of select case, if elseif and goto branching flows.
Benchmark program

program bench_select_if_goto
  use iso_fortran_env
  implicit none
  integer(int32), parameter :: tests_number = 10000
  integer(int32)            :: keyword
  real(real64), allocatable :: key_work(:)
  real(real64)              :: random
  integer(int64)            :: profiling(1:2)
  integer(int64)            :: count_rate
  real(real64)              :: system_clocks(1:3)
  integer(int32)            :: key_registers(1:3)
  integer(int32)            :: i

  key_registers = 0
  system_clocks = 0._real64
  do i=1, tests_number
    call random_number(random)
    keyword = nint(random*3, int32)
    if (keyword==1) key_registers(1) = key_registers(1) + 1
    if (keyword==2) key_registers(2) = key_registers(2) + 1
    if (keyword==3) key_registers(3) = key_registers(3) + 1

    call system_clock(profiling(1), count_rate)
    select case(keyword)
    case(1)
      call worker1(key=keyword, array=key_work)
    case(2)
      call worker2(key=keyword, array=key_work)
    case(3)
      call worker3(key=keyword, array=key_work)
    endselect
    call system_clock(profiling(2), count_rate)
    system_clocks(1) = system_clocks(1) + real(profiling(2) - profiling(1), kind=real64)/count_rate

    call system_clock(profiling(1), count_rate)
    if (keyword==1) then
      call worker1(key=keyword, array=key_work)
    elseif (keyword==2) then
      call worker2(key=keyword, array=key_work)
    elseif (keyword==3) then
      call worker3(key=keyword, array=key_work)
    endif
    call system_clock(profiling(2), count_rate)
    system_clocks(2) = system_clocks(2) + real(profiling(2) - profiling(1), kind=real64)/count_rate

    call system_clock(profiling(1), count_rate)
    goto (10, 20, 30), keyword 
    goto 40 
    10 call worker1(key=keyword, array=key_work) ; goto 40 
    20 call worker2(key=keyword, array=key_work) ; goto 40 
    30 call worker3(key=keyword, array=key_work) ; goto 40 
    40 continue
    call system_clock(profiling(2), count_rate)
    system_clocks(3) = system_clocks(3) + real(profiling(2) - profiling(1), kind=real64)/count_rate
  enddo
  print '(A,3F12.5)', ' keywords distribution (1,2,3):   ', key_registers*1._real32/tests_number
  print '(A,E23.15)', ' select case average performance: ', system_clocks(1)/tests_number
  print '(A,E23.15)', ' if elseif   average performance: ', system_clocks(2)/tests_number
  print '(A,E23.15)', ' goto        average performance: ', system_clocks(3)/tests_number

  contains
    pure subroutine worker1(key, array)
      integer(int32),            intent(in)  :: key
      real(real64), allocatable, intent(out) :: array(:)
      integer(int32)                         :: j

      allocate(array(1:key*tests_number))
      array = 0._real64
      do j=1, key*tests_number
        array(j) = key**2._real64 * tests_number * j
      enddo
    endsubroutine worker1

    pure subroutine worker2(key, array)
      integer(int32),            intent(in)  :: key
      real(real64), allocatable, intent(out) :: array(:)
      integer(int32)                         :: j

      allocate(array(1:key*tests_number))
      array = 0._real64
      do j=1, key*tests_number
        array(j) = key**2._real64 * tests_number * j
      enddo
    endsubroutine worker2

    pure subroutine worker3(key, array)
      integer(int32),            intent(in)  :: key
      real(real64), allocatable, intent(out) :: array(:)
      integer(int32)                         :: j

      allocate(array(1:key*tests_number))
      array = 0._real64
      do j=1, key*tests_number
        array(j) = key**2._real64 * tests_number * j
      enddo
    endsubroutine worker3
endprogram bench_select_if_goto
Results

Compiler


gcc version 6.2.0 20160901

CPU & RAM


Intel(R) Atom(TM) CPU N270   @ 1.60GHz
1 GB (ddr?)

Average performances


compiling flags
tests dimension
select case
if elseif
goto


-Og
10000
0.28344 10^-2
0.28359 10^-2
0.28360 10^-2


-O3
10000
0.24082 10^-3
0.24021 10^-3
0.24011 10^-3


Benchmarks output

stefano@hulk(07:39 AM Sun Oct 16)
~ 17 files, 1.7Mb
→ gfortran -Og bench_select_if_goto.f90 

stefano@hulk(07:39 AM Sun Oct 16)
~ 17 files, 1.7Mb
→ a.out 
 keywords distribution (1,2,3):        0.32520     0.33510     0.17250
 select case average performance:   0.283447384540006E-02
 if elseif   average performance:   0.283594608050004E-02
 goto        average performance:   0.283605782170003E-02

stefano@hulk(07:40 AM Sun Oct 16)
~ 17 files, 1.7Mb
→ gfortran -O3 bench_select_if_goto.f90 

stefano@hulk(07:41 AM Sun Oct 16)
~ 17 files, 1.7Mb
→ a.out 
 keywords distribution (1,2,3):        0.32520     0.33510     0.17250
 select case average performance:   0.240821688500007E-03
 if elseif   average performance:   0.240216072400011E-03
 goto        average performance:   0.240117276600013E-03
Conclusions

For this test case the computed goto does not seem to provide any meaningfull speedup. Consider that

select case helps in writing clear code that should be easier to understand and maintain;
computed goto is obsolescent in the Fortran standard;
select case works with integer, logical, AND CHARACTER scalar expressions whereas computed goto only uses scalar numeric expressions that may be converted to integer type.

Readers need to keep last point in mind. A coder would then need to take some extra action - use of some integer tables, perhaps - to utilize computed goto statements.
compiling flags	tests dimension	select case	if elseif	goto
-Og	10000	0.28344 10^-2	0.28359 10^-2	0.28360 10^-2
-O3	10000	0.24082 10^-3	0.24021 10^-3	0.24011 10^-3