Created
August 13, 2014 21:32
-
-
Save mnd999/a129e78c9cf9be278d7f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package soundex | |
import scala.io.Source | |
import java.io.File | |
import scala.util.Random | |
object SoundexHack extends App { | |
val dictWords = Source.fromFile(new File("/usr/share/dict/words")).getLines.toList | |
val dict = dictWords.groupBy(x => soundex(x)) | |
val queen = Source.fromInputStream(SoundexHack.getClass().getResourceAsStream("/queen.txt")) | |
.getLines.toList | |
queen.foreach(line => { | |
val newline = if (line.isEmpty()) "" | |
else stripChars(line).split(' ').map(word => { | |
if (word.length < 4) word | |
else getDictWord(word) | |
}).mkString(" ") | |
//println(line) | |
println(newline) | |
} | |
) | |
def getDictWord(word: String) : String = { | |
val possibles = dict.getOrElse(soundex(word),List(word)) | |
Random.shuffle(possibles).head | |
} | |
def soundex(word: String): String = { | |
val vowels = List('a', 'e', 'i', 'o', 'u', 'y') | |
val crazyshit = stripChars(word).toCharArray().toList.filter(x => !(Set('h', 'w')).contains(x)) | |
.map(f => encode(f)) | |
val lesscrazy = if (crazyshit == List()) List(0,0,0) | |
else removeDup(crazyshit).tail.filter(c => c != 99) ::: List(0,0,0) | |
word.head + lesscrazy.take(3).mkString("") | |
} | |
def removeDup(chars : List[_]) : List[_] = { | |
chars match { | |
case x::Nil => List(x) | |
case x::b::t if (x== b) => removeDup(x::t) | |
case x::t => x :: removeDup(t) | |
} | |
} | |
def encode(f: Character): Int = { | |
val ones = Set('b', 'f', 'p', 'v') | |
val twos = Set('c', 'g', 'j', 'k', 'q', 's', 'x', 'z') | |
val threes = Set('d', 't') | |
val fours = Set('l') | |
val fives = Set('m', 'n') | |
val sixes = Set('r') | |
val spares = Set('a', 'e', 'i', 'o', 'u', 'y') | |
if (ones.contains(f)) 1 | |
else if (twos.contains(f)) 2 | |
else if (threes.contains(f)) 3 | |
else if (fours.contains(f)) 4 | |
else if (fives.contains(f)) 5 | |
else if (sixes.contains(f)) 6 | |
else 99 | |
} | |
def stripChars(s: String ) : String = { | |
s.toLowerCase().replaceAll("[^a-z \n]", "") | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment