Skip to content

Instantly share code, notes, and snippets.

@nounderline
Created May 29, 2013 18:55
Show Gist options
  • Save nounderline/5672815 to your computer and use it in GitHub Desktop.
Save nounderline/5672815 to your computer and use it in GitHub Desktop.
[Ruby] Spelling Corrector
# Ruby Spelling Corrector ported from Python script by Peter Norvig (http://norvig.com/spell-correct.html)
#
# Usage:
# s = Spelling.new('big.txt').read # Download one from: http://norvig.com/big.txt
# s.correct 'gaem of thrnoes'
# => "gaem of thrnoes"
class Spelling
LETTERS = ('a'..'z').to_a.join
def initialize(sample)
@model = train(sample)
end
def correct(text)
result = []
split_words(text).each {|w| result.push candidate(w) }
result.join(' ')
end
private
def split_words(text)
text.downcase.scan(/[a-z]+/)
end
def train(text)
model = Hash.new(1)
words = split_words(text)
words.each {|w| model[w] += 1 }
return model
end
def edits1(word)
n = word.length
deletion = (0...n).collect {|i| word[0...i] + word[i + 1..-1] }
transposition = (0...n - 1).collect {|i| word[0...i] + word[i + 1, 1] + word[i, 1] + word[i + 2..- 1] }
alteration = []
n.times {|i| LETTERS.each_byte {|l| alteration << word[0...i] + l.chr + word[i + 1..-1] } }
insertion = []
(n + 1).times {|i| LETTERS.each_byte {|l| insertion << word[0...i] + l.chr + word[i..- 1] } }
result = deletion + transposition + alteration + insertion
result.any? && result
end
def known_edits2(word)
result = []
edits1(word).each {|e1| edits1(e1).each {|e2| result << e2 if @model.has_key?(e2) }}
result.empty? ? nil : result
end
def known(words)
result = words & @model.keys
result.empty? ? nil : result
end
def candidate(word)
candidates = known([word]) || known(edits1(word)) || known_edits2(word) || [word]
candidates.max {|a,b| @model[a]; @model[b] }
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment