Created
January 29, 2009 14:52
-
-
Save cloudhead/54565 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NWORDS = Hash.new( 1 ) | |
File.new('holmes.txt').read.downcase.scan(/[a-z]+/) { |k| NWORDS[k] += 1 } | |
LETTERS = ("a".."z").to_a.join | |
def edits1 word | |
n = word.length | |
alteration = insertion = [] | |
deletion = (0...n).collect { |i| word[0...i] + word[i + 1..-1] } # Deletion | |
transposition = (0...n-1).collect { |i| word[0...i] + word[i + 1, 1] + word[i, 1] + word[i + 2..-1] } # Transposition | |
n.times { |i| LETTERS.each_byte { |l| alteration << word[0...i] + l.chr + word[i + 1..-1] } } # Alterations | |
( n + 1 ).times { |i| LETTERS.each_byte { |l| insertion << word[0...i] + l.chr + word[i..-1] } } # Insertion | |
if ! ( result = deletion + transposition + alteration + insertion ).empty? then result end | |
end | |
def known_edits2 word | |
edits1( word ).collect { |e1| edits1( e1 ).select { |e2| NWORDS.has_key?(e2) } }.reject { |v| v.empty? } | |
end | |
def known words | |
if ! ( result = words.select { |w| NWORDS.has_key?(w) } ).empty? then result end | |
end | |
def correct word | |
(known([word]) or known(edits1(word)) or known_edits2(word) or [word]).max { |a,b| NWORDS[a] <=> NWORDS[b] }.first | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment