Skip to content

Instantly share code, notes, and snippets.

@h-lame
Created July 20, 2017 13:55
Show Gist options
  • Save h-lame/e5ec2e5dcfa58abaccb693c28ed28f45 to your computer and use it in GitHub Desktop.
Save h-lame/e5ec2e5dcfa58abaccb693c28ed28f45 to your computer and use it in GitHub Desktop.
Benchmarking some alternate implementations for checking if a BOM is present in a string and removing it.
require 'benchmark'
# you'll want this to be a largeish file that is representative of the kinds
# of CSS files you'll be using this on in the real world. I used a compiled
# version of application.css from https://github.com/alphagov/government-frontend
css = File.binread "bom.css"
class BomBench
def remove_bom_force_encoding_slice!(input)
input.force_encoding('UTF-8').slice!(0)
input
end
def remove_bom_force_encoding_slice(input)
input.force_encoding('UTF-8').slice(1..-1)
end
def remove_bom_force_encoding_char_index(input)
input.force_encoding('UTF-8')[1..-1]
end
def remove_bom_pack(input)
input.bytes[3..-1].pack('c*').force_encoding('UTF-8')
end
def remove_bom_force_encoding_sub(input)
input.force_encoding('UTF-8').sub(/^\xEF\xBB\xBF/, '')
end
def remove_bom_force_encoding_sub!(input)
input.force_encoding('UTF-8').sub!(/^\xEF\xBB\xBF/, '')
end
def remove_bom_dup_force_encoding_slice!(input)
out = input.dup.force_encoding('UTF-8')
out.slice!(0)
out
end
def remove_bom_dup_force_encoding_slice(input)
out = input.dup.force_encoding('UTF-8')
out.slice(1..-1)
out
end
def remove_bom_dup_force_encoding_char_index(input)
input.dup.force_encoding('UTF-8')[1..-1]
end
def remove_bom_dup_force_encoding_sub(input)
input.dup.force_encoding('UTF-8').sub(/^\xEF\xBB\xBF/, '')
end
def remove_bom_dup_force_encoding_sub!(input)
input.dup.force_encoding('UTF-8').sub!(/^\xEF\xBB\xBF/, '')
end
def bom_string_force_encoding_reset_full_regexp?(input)
with_encoding_force_encoding_reset(input, 'UTF-8') { |utf_8_input| utf_8_input =~ /^\xEF\xBB\xBF/ }
end
def bom_string_dup_force_encoding_full_regexp?(input)
with_encoding_dup_force_encoding(input, 'UTF-8') { |utf_8_input| utf_8_input =~ /^\xEF\xBB\xBF/ }
end
def bom_string_force_encoding_reset_slice_regexp?(input)
with_encoding_force_encoding_reset(input, 'UTF-8') { |utf_8_input| utf_8_input[0..2] =~ /^\xEF\xBB\xBF/ }
end
def bom_string_dup_force_encoding_slice_regexp?(input)
with_encoding_dup_force_encoding(input, 'UTF-8') { |utf_8_input| utf_8_input[0..2] =~ /^\xEF\xBB\xBF/ }
end
def bom_string_force_encoding_reset_slice_equality?(input)
with_encoding_force_encoding_reset(input, 'UTF-8') { |utf_8_input| utf_8_input[0] == "\xEF\xBB\xBF" }
end
def bom_string_dup_force_encoding_slice_equality?(input)
with_encoding_dup_force_encoding(input, 'UTF-8') { |utf_8_input| utf_8_input[0] == "\xEF\xBB\xBF" }
end
def bom_string_byte_check?(input)
with_encoding_byte_check(input, 'UTF-8') { |x| x.bytes[0..2] == [0xEF, 0xBB, 0xBF] }
end
def with_encoding_force_encoding_reset(string, encoding)
old_encoding = string.encoding
begin
return (yield string.force_encoding(encoding))
ensure
string.force_encoding(old_encoding)
end
end
def with_encoding_dup_force_encoding(string, encoding)
return (yield string.dup.force_encoding(encoding))
end
def with_encoding_byte_check(string, _encoding)
yield string
end
end
how_many = 10_000
compressor = BomBench.new
Benchmark.bmbm do |x|
x.report("check: bytes") { how_many.times { compressor.bom_string_byte_check?(css.dup) } }
x.report("check: dup + force_encoding + slice equality") { how_many.times { compressor.bom_string_dup_force_encoding_slice_equality?(css.dup) } }
x.report("check: dup + force_encoding + slice regexp") { how_many.times { compressor.bom_string_dup_force_encoding_slice_regexp?(css.dup) } }
x.report("check: dup + force_encoding + full regexp") { how_many.times { compressor.bom_string_dup_force_encoding_full_regexp?(css.dup) } }
x.report("check: force_encoding + reset + slice equality") { how_many.times { compressor.bom_string_force_encoding_reset_slice_equality?(css.dup) } }
x.report("check: force_encoding + reset + slice regexp") { how_many.times { compressor.bom_string_force_encoding_reset_slice_regexp?(css.dup) } }
x.report("check: force_encoding + reset + full regexp") { how_many.times { compressor.bom_string_force_encoding_reset_full_regexp?(css.dup) } }
end
Benchmark.bmbm do |x|
x.report("remove bom: pack") { how_many.times { compressor.remove_bom_pack(css.dup) } }
x.report("remove bom: force encoding + slice!") { how_many.times { compressor.remove_bom_force_encoding_slice!(css.dup) } }
x.report("remove bom: force encoding + slice") { how_many.times { compressor.remove_bom_force_encoding_slice(css.dup) } }
x.report("remove bom: force encoding + char index") { how_many.times { compressor.remove_bom_force_encoding_char_index(css.dup) } }
x.report("remove bom: force encoding sub!") { how_many.times { compressor.remove_bom_force_encoding_sub!(css.dup) } }
x.report("remove bom: force encoding sub") { how_many.times { compressor.remove_bom_force_encoding_sub(css.dup) } }
x.report("remove bom: dup + force encoding + slice!") { how_many.times { compressor.remove_bom_dup_force_encoding_slice!(css.dup) } }
x.report("remove bom: dup + force encoding + slice") { how_many.times { compressor.remove_bom_dup_force_encoding_slice(css.dup) } }
x.report("remove bom: dup + force encoding + char index") { how_many.times { compressor.remove_bom_dup_force_encoding_char_index(css.dup) } }
x.report("remove bom: dup + force encoding sub!") { how_many.times { compressor.remove_bom_dup_force_encoding_sub!(css.dup) } }
x.report("remove bom: dup + force encoding sub") { how_many.times { compressor.remove_bom_dup_force_encoding_sub(css.dup) } }
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment