Created
June 13, 2010 01:50
-
-
Save thinkerbot/436253 to your computer and use it in GitHub Desktop.
optimization of Rack::Utils.escape_html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'test/unit' | |
require 'benchmark' | |
# This benchmark compares the speed of several variations of the escape_html | |
# method in Rack::Utils (4bfb6fc2768312e8899759255904463b781a10cd) | |
# | |
# # Escape ampersands, brackets and quotes to their HTML/XML entities. | |
# def escape_html(string) | |
# string.to_s.gsub("&", "&"). | |
# gsub("<", "<"). | |
# gsub(">", ">"). | |
# gsub("'", "'"). | |
# gsub('"', """) | |
# end | |
# | |
# The benchmark shows the method can be optimized on at least several | |
# implementations (MRI, JRuby, Rubinius) of ruby by: | |
# | |
# * using regexp instead of strings -- /&/ vs "&" | |
# * using a case statement in a single gsub block instead of multiple gsubs | |
# * using a hash lookup in a single gsub block instead of multiple gsubs | |
# | |
# As an example of the output (MRI-1.8.7): | |
# | |
# Loaded suite gsub_vs_benchmark | |
# Started | |
# . user system total real | |
# gsub_chain: short (1-tail) 0.260000 0.010000 0.270000 ( 0.273159) | |
# optimized_chain: short (1-tail) 0.060000 0.000000 0.060000 ( 0.061465) | |
# block_plus_case: short (1-tail) 0.060000 0.000000 0.060000 ( 0.059225) | |
# block_plus_hash: short (1-tail) 0.050000 0.000000 0.050000 ( 0.042501) | |
# user system total real | |
# gsub_chain: long (3-head) 0.300000 0.010000 0.310000 ( 0.316970) | |
# optimized_chain: long (3-head) 0.100000 0.000000 0.100000 ( 0.101731) | |
# block_plus_case: long (3-head) 0.100000 0.000000 0.100000 ( 0.097080) | |
# block_plus_hash: long (3-head) 0.090000 0.000000 0.090000 ( 0.085789) | |
# | |
# [...] | |
# | |
# Total Time (s) | |
# gsub_chain: 2.25 | |
# optimized_chain: 0.530000000000001 | |
# block_plus_case: 0.48 | |
# block_plus_hash: 0.38 | |
# . | |
# Finished in 4.424738 seconds. | |
# | |
# 2 tests, 24 assertions, 0 failures, 0 errors | |
# | |
class GsubVsBenchmark < Test::Unit::TestCase | |
# short/long string with (n) substitutions | |
# head -- an 'early' substitution | |
# tail -- a 'late' substitution | |
STRINGS = { | |
"short (0)" => "abc", | |
"long (0)" => "abcdefghijklmnpqrstuvwxyz", | |
"short (1-head)" => "a&c", | |
"long (1-head)" => "abcdefghijk&mnpqrstuvwxyz", | |
"long (3-head)" => "a&cdefghijk&mnpqrstuvwx&z", | |
"short (1-tail)" => "a\"c", | |
"long (1-tail)" => "abcdefghijk\"mnpqrstuvwxyz", | |
"long (3-tail)" => "a\"cdefghijk\"mnpqrstuvwx\"z" | |
} | |
def gsub_chain(string) | |
string.to_s.gsub("&", "&"). | |
gsub("<", "<"). | |
gsub(">", ">"). | |
gsub("'", "'"). | |
gsub('"', """) | |
end | |
def optimized_chain(string) | |
string.to_s.gsub(/&/, "&"). | |
gsub(/</, "<"). | |
gsub(/>/, ">"). | |
gsub(/'/, "'"). | |
gsub(/"/, """) | |
end | |
def block_plus_case(string) | |
string.to_s.gsub(/[&<>'"]/) do |c| | |
case c | |
when "&" then "&" | |
when "<" then "<" | |
when ">" then ">" | |
when "'" then "'" | |
when '"' then """ | |
end | |
end | |
end | |
HASH = { | |
"&" => "&", | |
"<" => "<", | |
">" => ">", | |
"'" => "'", | |
'"' => """ | |
} | |
def block_plus_hash(string) | |
string.to_s.gsub(/[&<>'"]/) do |c| | |
HASH[c] | |
end | |
end | |
BLOCK = lambda do |c| | |
HASH[c] | |
end | |
def block_plus_hblk(string) | |
string.to_s.gsub(/[&<>'"]/, &BLOCK) | |
end | |
def test_equality_of_methods | |
STRINGS.each_pair do |desc, string| | |
expected = gsub_chain(string) | |
assert_equal expected, optimized_chain(string), "optimized_chain: #{desc}" | |
assert_equal expected, block_plus_case(string), "block_plus_case: #{desc}" | |
assert_equal expected, block_plus_hash(string), "block_plus_hash: #{desc}" | |
assert_equal expected, block_plus_hblk(string), "block_plus_hblk: #{desc}" | |
end | |
end | |
def test_speed_of_methods | |
n = 10000 | |
# primer -- allows implementations like JRuby/Rubinius to warm up | |
primer_str = %q{abc&<>'"} | |
n.times { gsub_chain(primer_str) } | |
n.times { optimized_chain(primer_str) } | |
n.times { block_plus_case(primer_str) } | |
n.times { block_plus_hash(primer_str) } | |
n.times { block_plus_hblk(primer_str) } | |
results = [] | |
STRINGS.each_pair do |desc, string| | |
Benchmark.bm(35) do |x| | |
a = x.report("gsub_chain: #{desc}") do | |
n.times { gsub_chain(string) } | |
end | |
b = x.report("optimized_chain: #{desc}") do | |
n.times { optimized_chain(string) } | |
end | |
c = x.report("block_plus_case: #{desc}") do | |
n.times { block_plus_case(string) } | |
end | |
d = x.report("block_plus_hash: #{desc}") do | |
n.times { block_plus_hash(string) } | |
end | |
e = x.report("block_plus_hblk: #{desc}") do | |
n.times { block_plus_hblk(string) } | |
end | |
results << [a, b, c, d, e] | |
end | |
end | |
totals = results.transpose.collect do |times| | |
times.inject(0) {|sum, time| sum + time.total } | |
end | |
puts | |
puts "Total Time (s)" | |
puts " gsub_chain: #{totals[0]}" | |
puts " optimized_chain: #{totals[1]}" | |
puts " block_plus_case: #{totals[2]}" | |
puts " block_plus_hash: #{totals[3]}" | |
puts " block_plus_hblk: #{totals[4]}" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment