Skip to content

Instantly share code, notes, and snippets.

@smerrill
Created November 25, 2009 01:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save smerrill/242386 to your computer and use it in GitHub Desktop.
Save smerrill/242386 to your computer and use it in GitHub Desktop.
val alphabet = "abcdefghijklmnopqrstuvwxyz"
def train(text:String) = {
"[a-z]+".r.findAllIn(text).foldLeft(Map[String, Int]() withDefaultValue 1) {(a, b) => a(b) = a(b) + 1}
}
val NWORDS = train(scala.io.Source.fromFile("big.txt").getLines.mkString.toLowerCase)
def known(words:Set[String]) = {println("Known invocation: %s" format words); Set.empty ++ (for(w <- words if NWORDS contains w) yield w)}
def edits1(word:String) = {
Set.empty ++
(for (i <- 0 until word.length) yield (word take i) + (word drop (i + 1))) ++ // Deletes
(for (i <- 0 until word.length - 1) yield (word take i) + word(i + 1) + word(i) + (word drop (i + 2))) ++ // Transposes
(for (i <- 0 until word.length; j <- alphabet) yield (word take i) + j + (word drop (i+1))) ++ // Replaces
(for (i <- 0 until word.length; j <- alphabet) yield (word take i) + j + (word drop i)) // Inserts
}
def known_edits2(word:String) = {Set.empty ++ (for (e1 <- edits1(word); e2 <- edits1(e1) if NWORDS contains e2) yield e2)}
implicit def toOr[A](one: Set[A]) = new AnyRef {
def or(other: => Set[A]): Set[A] = { println("or called: %s" format one); if (one.isEmpty) other else one }
}
def correct(word: String) = {
val candidates = known(Set(word)) or known(edits1(word)) or known_edits2(word) or Set(word)
candidates.foldLeft("") {(a, b) => if (NWORDS(a) > NWORDS(b)) a else b}
}
/* Outputs:
* scala> correct("the")
* Known invocation: Set(the)
* Known invocation: Set(the)
* or called: Set(the)
* Known invocation: Set(the)
* Known invocation: Set(the)
* or called: Set(the)
* or called: Set(the)
* Known invocation: Set(the)
* Known invocation: Set(the)
* or called: Set(the)
* Known invocation: Set(the)
* Known invocation: Set(the)
* or called: Set(the)
* or called: Set(the)
* or called: Set(the)
* res3: java.lang.String = the
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment