Skip to content

Instantly share code, notes, and snippets.

@benshimmin
Last active August 29, 2015 14:11
Show Gist options
  • Save benshimmin/2ee78c932797faadfc89 to your computer and use it in GitHub Desktop.
Save benshimmin/2ee78c932797faadfc89 to your computer and use it in GitHub Desktop.
Peter Norvig's famous spelling corrector, in CoffeeScript (with Underscore)
# A CoffeeScript version of http://norvig.com/spell-correct.html
[fs, _] = [require("fs"), require("underscore")]
words = (text) -> text.toLowerCase().match /[a-z]+/g
train = (features) ->
model = {}
_.each features, (f) -> if model[f] then model[f] += 1 else model[f] = 1
model
NWORDS = train words fs.readFileSync("big.txt").toString()
alphabet = "abcdefghijklmnopqrstuvwxyz"
edits1 = (word) ->
splits = _.map _.range(0, word.length + 1), (i) ->
[(if i is 0 then "" else word[...i]), word[i...]]
deletes = _.map splits, ([a,b]) -> a + b[1...] if b
transposes = _.map splits, ([a,b]) -> a + b[1] + b[0] + b[2...] if b.length > 1
replaces = _.map splits, ([a,b]) -> a + c + b[1...] for c in alphabet if b
inserts = _.map splits, ([a,b]) -> a + c + b for c in alphabet
_.compact _.flatten [deletes, transposes, replaces, inserts]
knownEdits2 = (word) ->
_.chain(edits1(word)).map((e) -> _.select edits1(e), (e) -> _.has NWORDS, e)
.flatten().compact().uniq().value()
known = (words) -> _.uniq _.compact _.select words, (w) -> _.has NWORDS, w
correct = (word) ->
candidates = _.find [known([word]), known(edits1(word)), knownEdits2(word), [word]], (a) -> a.length
(_.max _.map(candidates, (c) -> [c, NWORDS[c]]), (c) -> c[1])[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment