Skip to content

Instantly share code, notes, and snippets.

@afeinberg
Created December 29, 2009 01:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afeinberg/265089 to your computer and use it in GitHub Desktop.
Save afeinberg/265089 to your computer and use it in GitHub Desktop.
import io.Source
object SpellChecker {
val NWORDS = train(words(Source.fromFile("/tmp/big.txt", "ISO-8859-1").mkString))
val alphabet = ('a' to 'z').toList
def words(text: String):Iterator[String] = """[a-z]+""".r.findAllIn(text.toLowerCase)
def train(features: Iterator[String]):Map[String,Int] = {
val empty = Map[String,Int]().withDefaultValue(1)
features.foldLeft(empty) { case (model, f) => model(f) += 1 }
}
def edits1(word: String):Set[String] = {
val s = (0 to word.length).toList map { i => (word.take(i), word.drop(i)) }
val deletes = for ((a,b) <- s if b.length > 0) yield a + b.drop(1)
val transposes = for ((a,b) <- s if b.length > 1) yield a + b(1) + b(0) + b.drop(2)
val replaces = for ((a,b) <- s if b.length > 0; c <- alphabet) yield a + c + b.drop(1)
val inserts = for ((a,b) <- s; c <- alphabet) yield a + c + b
Set.empty ++ deletes ++ transposes ++ replaces ++ inserts
}
def known(words: Set[String]):Set[String] = words.filter(NWORDS.contains)
def correct(word: String):String = {
lazy val wordKnown = known(Set(word))
lazy val wordEdits1 = known(edits1(word))
lazy val wordEdits2 = known(edits1(word).flatMap(edits1))
val candidates = (if (!wordKnown.isEmpty) wordKnown else
if (!wordEdits1.isEmpty) wordEdits1 else
if (!wordEdits2.isEmpty) wordEdits2 else Set(word))
candidates.reduceLeft { (a, b) => (if (NWORDS(a) > NWORDS(b)) a else b) }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment