Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Scala implementation of Peter Norvig's spellchecker (http://norvig.com/spell-correct.html)
class NorvigSpellChecker(corpus: String, alphabet: Seq[Char] = 'a' to 'z', level: Int = 2) {
val words = s"[${alphabet.head}-${alphabet.last}]+".r.findAllIn(corpus.toLowerCase).toSeq
val count = words.groupBy(_.toSeq).mapValues(_.size) withDefaultValue 0
def edit(n: Int)(word: Seq[Char]): Set[Seq[Char]] = n match {
case 0 => Set(word)
case 1 =>
val splits = word.indices map word.splitAt
val deletes = splits collect {case (a, b0 +: b1) => a ++ b1}
val transposes = splits collect {case (a, b0 +: b1 +: b2) => a ++: b1 +: b0 +: b2}
val replaces = alphabet flatMap {c => splits collect {case (a, b0 +: b1) => a ++: c +: b1}}
val inserts = alphabet flatMap {c => splits map {case (a, b) => a ++: c +: b}}
(deletes ++ transposes ++ replaces ++ inserts).toSet
case _ => edit(n - 1)(word) flatMap edit(1)
}
def apply(word: String) = Stream.tabulate(level + 1)(i => edit(i)(word) maxBy count).find(count.contains).map(_.mkString)
}
// Demo:
object NorvigSpellChecker extends App {
val checker = new NorvigSpellChecker(io.Source.fromFile("big.txt").mkString) // curl http://norvig.com/big.txt > big.txt
Seq("speling", "korrecter", "corrected", "xyz") foreach {w => println(s"$w: ${checker(w)}")}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment