Skip to content

Instantly share code, notes, and snippets.

@volkanunsal
Forked from benshimmin/spelling_corrector.coffee
Last active August 29, 2015 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save volkanunsal/cbce682b0f0d59b7c7de to your computer and use it in GitHub Desktop.
Save volkanunsal/cbce682b0f0d59b7c7de to your computer and use it in GitHub Desktop.
# A CoffeeScript version of http://norvig.com/spell-correct.html
[fs, _] = [require("fs"), require("underscore")]
words = (text) -> text.toLowerCase().match /[a-z]+/g
train = (features) ->
model = {}
_.each features, (f) -> if model[f] then model[f] += 1 else model[f] = 1
model
NWORDS = train words fs.readFileSync("big.txt").toString()
alphabet = "abcdefghijklmnopqrstuvwxyz"
edits1 = (word) ->
splits = _.map _.range(0, word.length + 1), (i) ->
[(if i is 0 then "" else word[...i]), word[i...]]
deletes = _.map splits, ([a,b]) -> a + b[1...] if b
transposes = _.map splits, ([a,b]) -> a + b[1] + b[0] + b[2...] if b.length > 1
replaces = _.map splits, ([a,b]) -> a + c + b[1...] for c in alphabet if b
inserts = _.map splits, ([a,b]) -> a + c + b for c in alphabet
_.compact _.flatten [deletes, transposes, replaces, inserts]
knownEdits2 = (word) ->
_.chain(edits1(word)).map((e) -> _.select edits1(e), (e) -> _.has NWORDS, e)
.flatten().compact().uniq().value()
known = (words) -> _.chain(words).select((w)->_.has(NWORDS,w)).compact().uniq().value()
correct = (word) ->
candidates = _.find [known([word]), known(edits1(word)), knownEdits2(word), [word]], (a) -> a.length
(_.max _.map(candidates, (c) -> [c, NWORDS[c]]), (c) -> c[1])[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment