Skip to content

Instantly share code, notes, and snippets.

@thedmitriyk
Created July 15, 2015 11:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thedmitriyk/a70519d43c12108ad0d5 to your computer and use it in GitHub Desktop.
Save thedmitriyk/a70519d43c12108ad0d5 to your computer and use it in GitHub Desktop.
A naïve Scala implementation of Peter Norvig's Spelling Corrector from http://norvig.com/spell-correct.html Optimized for neither readability nor brevity nor performance.
object SpellCheck {
val alpha = "abcdefghijklmnopqrstuvwxyz"
val model = scala.io.Source.fromFile("big.txt")
.getLines()
.flatMap(_.toLowerCase.split("\\W+"))
.foldLeft(Map.empty[String, Int].withDefaultValue(1)) { (acc, word) =>
acc + (word -> (acc.getOrElse(word, 0) + 1))
}
def known(words: Set[String]) = words.intersect(model.keySet)
def edits1(word: String) = {
val lcWord = word.toLowerCase
val splits = lcWord.inits.map(i => (i, lcWord.drop(i.length))).toSet
splits.collect { // deletes
case (a, b) if b.length > 0 => a + b.drop(1)
} ++ splits.collect { // transposes
case (a, b) if b.length > 1 => a + b.take(2).reverse + b.drop(2)
} ++ splits.flatMap { // inserts
case (a, b) => alpha.map(a + _ + b)
} ++ splits.collect { // replaces
case (a, b) if b.length > 0 => alpha.map(a + _ + b.drop(1))
}.flatten
}
def known_edits2(word: String) = known(edits1(word).flatMap(edits1))
def apply(word: String) = {
known(Set(word)).headOption
.orElse(known(edits1(word)).toSeq.sortBy(model).lastOption)
.orElse(known_edits2(word).toSeq.sortBy(model).lastOption)
.getOrElse(word)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment