Skip to content

Instantly share code, notes, and snippets.

@thinkerbot
Created June 13, 2010 01:50
Show Gist options
  • Save thinkerbot/436253 to your computer and use it in GitHub Desktop.
Save thinkerbot/436253 to your computer and use it in GitHub Desktop.
optimization of Rack::Utils.escape_html
require 'test/unit'
require 'benchmark'
# This benchmark compares the speed of several variations of the escape_html
# method in Rack::Utils (4bfb6fc2768312e8899759255904463b781a10cd)
#
# # Escape ampersands, brackets and quotes to their HTML/XML entities.
# def escape_html(string)
# string.to_s.gsub("&", "&").
# gsub("<", "&lt;").
# gsub(">", "&gt;").
# gsub("'", "&#39;").
# gsub('"', "&quot;")
# end
#
# The benchmark shows the method can be optimized on at least several
# implementations (MRI, JRuby, Rubinius) of ruby by:
#
# * using regexp instead of strings -- /&/ vs "&"
# * using a case statement in a single gsub block instead of multiple gsubs
# * using a hash lookup in a single gsub block instead of multiple gsubs
#
# As an example of the output (MRI-1.8.7):
#
# Loaded suite gsub_vs_benchmark
# Started
# . user system total real
# gsub_chain: short (1-tail) 0.260000 0.010000 0.270000 ( 0.273159)
# optimized_chain: short (1-tail) 0.060000 0.000000 0.060000 ( 0.061465)
# block_plus_case: short (1-tail) 0.060000 0.000000 0.060000 ( 0.059225)
# block_plus_hash: short (1-tail) 0.050000 0.000000 0.050000 ( 0.042501)
# user system total real
# gsub_chain: long (3-head) 0.300000 0.010000 0.310000 ( 0.316970)
# optimized_chain: long (3-head) 0.100000 0.000000 0.100000 ( 0.101731)
# block_plus_case: long (3-head) 0.100000 0.000000 0.100000 ( 0.097080)
# block_plus_hash: long (3-head) 0.090000 0.000000 0.090000 ( 0.085789)
#
# [...]
#
# Total Time (s)
# gsub_chain: 2.25
# optimized_chain: 0.530000000000001
# block_plus_case: 0.48
# block_plus_hash: 0.38
# .
# Finished in 4.424738 seconds.
#
# 2 tests, 24 assertions, 0 failures, 0 errors
#
class GsubVsBenchmark < Test::Unit::TestCase
# short/long string with (n) substitutions
# head -- an 'early' substitution
# tail -- a 'late' substitution
STRINGS = {
"short (0)" => "abc",
"long (0)" => "abcdefghijklmnpqrstuvwxyz",
"short (1-head)" => "a&c",
"long (1-head)" => "abcdefghijk&mnpqrstuvwxyz",
"long (3-head)" => "a&cdefghijk&mnpqrstuvwx&z",
"short (1-tail)" => "a\"c",
"long (1-tail)" => "abcdefghijk\"mnpqrstuvwxyz",
"long (3-tail)" => "a\"cdefghijk\"mnpqrstuvwx\"z"
}
def gsub_chain(string)
string.to_s.gsub("&", "&amp;").
gsub("<", "&lt;").
gsub(">", "&gt;").
gsub("'", "&#39;").
gsub('"', "&quot;")
end
def optimized_chain(string)
string.to_s.gsub(/&/, "&amp;").
gsub(/</, "&lt;").
gsub(/>/, "&gt;").
gsub(/'/, "&#39;").
gsub(/"/, "&quot;")
end
def block_plus_case(string)
string.to_s.gsub(/[&<>'"]/) do |c|
case c
when "&" then "&amp;"
when "<" then "&lt;"
when ">" then "&gt;"
when "'" then "&#39;"
when '"' then "&quot;"
end
end
end
HASH = {
"&" => "&amp;",
"<" => "&lt;",
">" => "&gt;",
"'" => "&#39;",
'"' => "&quot;"
}
def block_plus_hash(string)
string.to_s.gsub(/[&<>'"]/) do |c|
HASH[c]
end
end
BLOCK = lambda do |c|
HASH[c]
end
def block_plus_hblk(string)
string.to_s.gsub(/[&<>'"]/, &BLOCK)
end
def test_equality_of_methods
STRINGS.each_pair do |desc, string|
expected = gsub_chain(string)
assert_equal expected, optimized_chain(string), "optimized_chain: #{desc}"
assert_equal expected, block_plus_case(string), "block_plus_case: #{desc}"
assert_equal expected, block_plus_hash(string), "block_plus_hash: #{desc}"
assert_equal expected, block_plus_hblk(string), "block_plus_hblk: #{desc}"
end
end
def test_speed_of_methods
n = 10000
# primer -- allows implementations like JRuby/Rubinius to warm up
primer_str = %q{abc&<>'"}
n.times { gsub_chain(primer_str) }
n.times { optimized_chain(primer_str) }
n.times { block_plus_case(primer_str) }
n.times { block_plus_hash(primer_str) }
n.times { block_plus_hblk(primer_str) }
results = []
STRINGS.each_pair do |desc, string|
Benchmark.bm(35) do |x|
a = x.report("gsub_chain: #{desc}") do
n.times { gsub_chain(string) }
end
b = x.report("optimized_chain: #{desc}") do
n.times { optimized_chain(string) }
end
c = x.report("block_plus_case: #{desc}") do
n.times { block_plus_case(string) }
end
d = x.report("block_plus_hash: #{desc}") do
n.times { block_plus_hash(string) }
end
e = x.report("block_plus_hblk: #{desc}") do
n.times { block_plus_hblk(string) }
end
results << [a, b, c, d, e]
end
end
totals = results.transpose.collect do |times|
times.inject(0) {|sum, time| sum + time.total }
end
puts
puts "Total Time (s)"
puts " gsub_chain: #{totals[0]}"
puts " optimized_chain: #{totals[1]}"
puts " block_plus_case: #{totals[2]}"
puts " block_plus_hash: #{totals[3]}"
puts " block_plus_hblk: #{totals[4]}"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment