Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 284km/62435bca1f829e602dc183a84e467b97 to your computer and use it in GitHub Desktop.
Save 284km/62435bca1f829e602dc183a84e467b97 to your computer and use it in GitHub Desktop.
StringScanner#scan が思ったより遅いことについて考えていた.md
# $ ruby strscan_split.rb
# Warming up --------------------------------------
#                split    14.000  i/100ms
#              strscan     4.000  i/100ms
#   strscan without <<     5.000  i/100ms
# strscan w/o << regex     5.000  i/100ms
#    strscan w/o split     4.000  i/100ms
#            strscan.*    32.000  i/100ms
# Calculating -------------------------------------
#                split    133.139  (± 4.5%) i/s -    672.000  in   5.056626s
#              strscan     40.042  (±15.0%) i/s -    196.000  in   5.045718s
#   strscan without <<     54.703  (± 3.7%) i/s -    275.000  in   5.037360s
# strscan w/o << regex     50.874  (± 3.9%) i/s -    255.000  in   5.021319s
#    strscan w/o split     49.357  (± 6.1%) i/s -    248.000  in   5.053598s
#            strscan.*    320.915  (± 5.9%) i/s -      1.600k in   5.003380s
#
# Comparison:
#            strscan.*:      320.9 i/s
#                split:      133.1 i/s - 2.41x  slower
#   strscan without <<:       54.7 i/s - 5.87x  slower
# strscan w/o << regex:       50.9 i/s - 6.31x  slower
#    strscan w/o split:       49.4 i/s - 6.50x  slower
#              strscan:       40.0 i/s - 8.01x  slower
#
# ruby strscan_split.rb  41.28s user 0.79s system 98% cpu 42.646 total


require 'benchmark/ips'
require "strscan"

n_rows = 1000
alphas = ["AAAAA"] * 50
unquoted = (alphas.join(",") + "\r\n") * n_rows

Benchmark.ips do |x|
  x.report("split") {
    a = []
    unquoted.split(/\r\n/).each{|l|  l.split(',').each{|s| a << s} }
  }

  x.report("strscan") {
    a = []
    unquoted.split(/\r\n/).each{|l|
      ss = StringScanner.new(l)
      until ss.eos?
        a << ss.scan(/[^,]+/)
        a.last << ss.scan(/,/).to_s
      end
    }
  }

  x.report("strscan without <<") {
    a = []
    unquoted.split(/\r\n/).each{|l|
      ss = StringScanner.new(l)
      until ss.eos?
        a << ss.scan(/[^,]+/)
        ss.scan(/,/)
      end
    }
  }

  x.report("strscan w/o << regex") {
    a = []
    r1 = /[^,]+/
    r2 = /,/
    unquoted.split(/\r\n/).each{|l|
      ss = StringScanner.new(l)
      until ss.eos?
        a << ss.scan(r1)
        ss.scan(r2)
      end
    }
  }

  x.report("strscan w/o split") {
    a = []
    r1 = /[^,\r\n]+/
    r2 = /,/
    r3 = /\r\n/
    ss = StringScanner.new(unquoted)
    until ss.eos?
      a << ss.scan(r1)
      ss.scan(r2)
      ss.scan(r3)
    end
  }

  x.report("strscan.*") {
    a = []
    unquoted.split(/\r\n/).each{|l|
      ss = StringScanner.new(l)
      until ss.eos?
        a << ss.scan(/.+/)
      end
    }
  }

  x.compare!
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment