Skip to content

Instantly share code, notes, and snippets.

@gilbertw1
Created October 5, 2013 15:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gilbertw1/6842451 to your computer and use it in GitHub Desktop.
Save gilbertw1/6842451 to your computer and use it in GitHub Desktop.
In memory word frequency. Single core and parallel
/* Non-parallel Version -- Time: 5546ms */
val freqs = calculateFrequencies(words, 0, words.length)
/* Parallel Version -- Time 2891ms */
val pfactor = 4
val workSize = words.length / pfactor
val freqs = combineMaps (
(0 until pfactor).par.map { unit =>
calculateFrequencies(words, (unit * workSize), ((unit+1) * workSize))
}
)
/* Functions */
def calculateFrequencies(words: Array[String], start: Int, end: Int): Map[String, Int] = {
val freqs = Map[String,Int]()
var idx = start
while(idx < end) {
freqs(words(idx)) = freqs.getOrElseUpdate(words(idx), 0) + 1
idx += 1
}
freqs
}
def combineMaps(maps: ParIterable[Map[String,Int]]): Map[String,Int] = {
maps.reduce { (m, s) =>
s.keys.foreach { k =>
if(m contains k)
m(k) = m(k) + s(k)
else
m(k) = s(k)
}
m
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment