Skip to content

Instantly share code, notes, and snippets.

@javouhey
Forked from paulmillr/mapreduce.scala
Created April 29, 2012 07:05
Show Gist options
  • Save javouhey/2537998 to your computer and use it in GitHub Desktop.
Save javouhey/2537998 to your computer and use it in GitHub Desktop.
Why functional programming matters (aka MapReduce for humans)
import com.cloudera.crunch._
import com.cloudera.scrunch._
class ScrunchWordCount {
def wordCount(inputFile: String, outputFile: String) = {
val pipeline = new Pipeline[ScrunchWordCount]
pipeline.read(from.textFile(inputFile))
.flatMap(_.toLowerCase.split("\\W+"))
.filter(!_.isEmpty())
.count
.write(to.textFile(outputFile)) // Word counts
.map((word, count) => (word.slice(0, 1), count))
.groupByKey.combine(v => v.sum).materialize
pipeline.done
}
}
object ScrunchWordCount {
def main(args: Array[String]) = {
new ScrunchWordCount.wordCount(args(0), args(1))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment