Created
February 12, 2012 13:34
-
-
Save fxn/1808549 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'active_support/inflector' | |
require 'benchmark' | |
# QUICK HACK | |
class RuleSet | |
def initialize | |
@rules = [] | |
@regexp = nil | |
@group2rule = {} | |
end | |
def append_rule(pattern, replacement) | |
ngroups = @regexp ? count_groups(@regexp) : 0 | |
@group2rule[ngroups] = @rules.size | |
new_regexp = build_regexp(pattern) | |
@regexp = @regexp ? Regexp.union(@regexp, new_regexp) : new_regexp | |
@rules << [pattern, replacement] | |
end | |
def build_regexp(pattern) | |
pattern = Regexp.quote(pattern) if pattern.is_a?(String) | |
/(#{pattern})/ | |
end | |
def apply(word) | |
word = word.dup | |
if md = @regexp.match(word) | |
index = md.captures.find_index {|_| _} | |
rule = @rules[@group2rule[index]] | |
word.sub!(rule[0], rule[1]) | |
end | |
word | |
end | |
def count_groups(pattern) | |
Regexp.union(pattern, //).match('').captures.length | |
end | |
end | |
rs = RuleSet.new | |
ActiveSupport::Inflector::Inflections.instance.plurals.each do |pattern, replacement| | |
rs.append_rule(pattern, replacement) | |
end | |
words = File.readlines('/usr/share/dict/words').map(&:chomp) | |
STDOUT.sync = true | |
print "sanity checking... %00" | |
words.each_with_index do |word, i| | |
print "\b\b%02d" % ((100.0*i)/words.size).to_i if i % 1000 == 0 | |
unless ActiveSupport::Inflector::Inflections.instance.uncountables.include?(word) | |
as_plural = ActiveSupport::Inflector.pluralize(word) | |
rs_plural = rs.apply(word) | |
raise "error: #{as_plural} vs #{rs_plural} for #{word}" unless as_plural == rs_plural | |
end | |
end | |
puts "\b\b100\n" | |
Benchmark.bm do |x| | |
x.report('AS') { words.each {|word| ActiveSupport::Inflector.pluralize(word) }} | |
x.report('RS') { words.each {|word| rs.apply(word) }} | |
end | |
__END__ | |
sanity checking... %100 | |
user system total real | |
AS 75.010000 0.930000 75.940000 ( 78.532994) | |
RS 11.820000 0.160000 11.980000 ( 12.080551) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment