Skip to content

Instantly share code, notes, and snippets.

@pkukielka
Last active December 31, 2015 18:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pkukielka/8030702 to your computer and use it in GitHub Desktop.
Save pkukielka/8030702 to your computer and use it in GitHub Desktop.
Make implementation parallel
class SpellChecker {
val alphabet = ('a' to 'z') ++ " "
def trim(word: String) = word.toLowerCase.filter(alphabet.contains(_))
val file = io.Source.fromFile("big.txt").mkString
val wordsCount = file.split(' ').map(trim).groupBy(identity).mapValues(_.length).par
def distortions(w: String) = {
def disort(t: Int, d: Int, insert: String) = w.take(t) + insert + w.drop(d)
val transposes = for { i <- (0 to w.length - 2) } yield disort(i, i + 2, "" + w(i + 1) + w(i))
val deletions = for { i <- (0 to w.length - 1) } yield disort(i, i + 1, "")
val replaces = for { i <- (0 to w.length - 1); l <- alphabet } yield disort(i , i + 1, l.toString)
val insertions = for { i <- (0 to w.length ); l <- alphabet } yield disort(i , i , l.toString)
(deletions ++ replaces ++ insertions ++ transposes).par
}
def matchingDistorsions(distortions: scala.collection.parallel.immutable.ParSeq[String]) = {
val matching = distortions.filter(wordsCount.contains(_))
if (matching.isEmpty) None else Some(matching.maxBy(wordsCount(_)))
}
def correct(word: String): Option[String] = {
(if (wordsCount.contains(trim(word))) Some(word) else None) orElse
matchingDistorsions(distortions(trim(word))) orElse
matchingDistorsions(distortions(trim(word)).flatMap(distortions(_))) orElse
None
}
}
@pkukielka
Copy link
Author

Performance on my i5 machine for 8 letter word:
~20 corrections / sec in case of double distortion
~3000 corrections / sec in case of single distortion

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment