Skip to content

Instantly share code, notes, and snippets.

@zzmp
Last active August 29, 2015 14:11
Show Gist options
  • Save zzmp/4bf43e7ea507045e5eab to your computer and use it in GitHub Desktop.
Save zzmp/4bf43e7ea507045e5eab to your computer and use it in GitHub Desktop.
Spellchecker
// This code is meant to accompany this [blog entry](http://www.garabagne.io/2014/12/15/spellchecker/).
// It was inspired primarily by [this article](http://www.norvig.com/spell-correct.html).
Spellchecker = function() {
/** A spellchecker.
* _Will suggest more common spellings of questionable words._
* @class Spellchecker
* @param {string} [text] - space-separated text, as from a book, to use for initial training
* @example
* // someText = 'hello world'
* // moreText = 'goodbye world'
* var spellchecker = new Spellchecker(someText)
* spellchecker.train(moreText)
* var sentence = 'hillo sweet wrld'.split(' ')
* .map(function(word) {
* var candidates = spellchecker.correct(word)
* if (candidates.length) return candidates[0]
* else return word
* })
* .join(' ')
* console.log(sentence) // 'hello sweet world'
*/
function Spellchecker(text) {
this.dict = {}
if (text) this.train(text)
}
/** Train the instance with a block of space-separated text.
* _Training is meant to accomodate for word frequency, so a dictionary of words is not appropriate._
* @param {string} dictionary - space-separated text, as from a book
*/
Spellchecker.prototype.train = train
/** Return an array of possible corrections to a word.
* _If the word appears to be correct, an empty array will be returned._
* @param {string} word - a word to correct
* @returns {array} candidates - the most likely corrections, lower-cased (will not include `word`)
*/
Spellchecker.prototype.correct = correct
var letters = 'abcdefghijklmnopqrstuvwxyz'.split('')
function train(dictionary) {
var dict = dictionary.toLowerCase(),
regexp = /[a-z]+/ig,
words
while ((words = regexp.exec(dict)))
this.dict[words[0]] = this.dict.hasOwnProperty(words[0]) ?
this.dict[words[0]] + 1 : 1
}
function correct(word) {
if (this.dict.hasOwnProperty(word)) return []
var dict = this.dict,
score = 0
return edits(word)
.filter(function(candidate) {
if (dict.hasOwnProperty(candidate)) {
score = Math.max(score, dict[candidate])
return true
}
})
.filter(function(candidate) {
return dict[candidate] === score
})
}
/** Exploratory function to edit a word.
* _Edits can include:_
* - _deletion_
* - _transposition_
* - _insertion_
* - _alteration_
* @param {string} word - the word to explore.
* @returns {array} words - a new problem-space of word edits.
*/
function edits(word) {
var words = []
// Assume UTF-8 (i.e. can be simply iterated)
for (var i = 0; i < word.length; i++) {
var pre = word.slice(0, i),
post = word.slice(i),
rest = word.slice(i + 1)
// deletion - remove the current letter
words.push(pre + rest)
// transposition - shift the current letter
words.push(pre, rest.slice(0, 1), word[i], rest.slice(1))
letters.forEach(function(letter) {
// insertion - insert a 'missing' letter
words.push(pre + letter + post)
// alteration - fix a 'mistyped' letter
words.push(pre + letter + rest)
})
}
return words
}
return Spellchecker
}()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment