Created
June 3, 2012 22:05
-
-
Save Mononofu/2865184 to your computer and use it in GitHub Desktop.
Text unscrambler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object Scramble extends App { | |
val words = io.Source.fromFile("/usr/share/dict/words").getLines() | |
val uniques = words.filter(! _.contains("'")).toList | |
// group words by what you get when you sort their individual characters | |
val scrambled_lookup = uniques.map(w => w.toList.sorted.mkString).zip(uniques).groupBy(_._1).map { | |
case (scrambled, ws) => (scrambled, ws.map(_._2)) | |
} | |
// words whose sorted versions collide | |
val non_uniques = scrambled_lookup.toList.map(_._2).sortWith(_.length > _.length).filter(_.length > 1) | |
// words which have non-unique sorted representation even if you fix first and last letter | |
val not_recognizable = non_uniques.map { | |
// remove the first filter to include plural words | |
case anagrams => anagrams.filter(_.last != 's').combinations(2).toList.filter { | |
case first :: second :: nil => | |
first.head == second.head && first.last == second.last | |
} .flatten.groupBy(w => w).toList.map(_._1) | |
} .filter(_.length > 0) | |
// all the non-unique words | |
println(not_recognizable.sortWith(_.length > _.length)) | |
// number of words with same sorted representation, grouped by size of group | |
println(not_recognizable.groupBy(l => l.length).toList.map { | |
case (length, list_of_ws) => (length, list_of_ws.length) | |
}) | |
def unscramble(text: String) = text.replaceAll("[\\.,]", "").split(" ").map(_.toLowerCase).map { | |
case w => scrambled_lookup(w.toList.sorted.mkString).filter{ | |
case p => p.head == w.head && p.last == w.last | |
}.head | |
}.mkString(" ") | |
val txt = "Olny srmat poelpe can raed tihs. I cluod not blveiee taht I cluod aulaclty uesdnatnrd waht I was rdanieg. The pheonmneal pweor of the hmuan mnid" | |
println(unscramble(txt)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
object Scramble extends App {
val words = io.Source.fromFile("/usr/share/dict/words").getLines()
val uniques = words.filter(! _.contains("'")).toList
// group words by what you get when you sort their individual characters
val scrambled_lookup = uniques.map(w => w.toList.sorted.mkString).zip(uniques).groupBy(_.1).map {
case (scrambled, ws) => (scrambled, ws.map(.2))
}
// words whose sorted versions collide
val non_uniques = scrambled_lookup.toList.map(.2).sortWith(.length > .length).filter(.length > 1)
// words which have non-unique sorted representation even if you fix first and last letter
val not_recognizable = non_uniques.map {
// remove the first filter to include plural words
case anagrams => anagrams.filter(.last != 's').combinations(2).toList.filter {
case first :: second :: nil =>
first.head == second.head && first.last == second.last
} .flatten.groupBy(w => w).toList.map(.1)
} .filter(.length > 0)
// all the non-unique words
println(not_recognizable.sortWith(_.length > _.length))
// number of words with same sorted representation, grouped by size of group
println(not_recognizable.groupBy(l => l.length).toList.map {
case (length, list_of_ws) => (length, list_of_ws.length)
})
def unscramble(text: String) = text.replaceAll("[\.,]", "").split(" ").map(_.toLowerCase).map {
case w => scrambled_lookup(w.toList.sorted.mkString).filter{
case p => p.head == w.head && p.last == w.last
}.head
}.mkString(" ")
val txt = "Olny srmat poelpe can raed tihs. I cluod not blveiee taht I cluod aulaclty uesdnatnrd waht I was rdanieg. The pheonmneal pweor of the hmuan mnid"
println(unscramble(txt))
}