Skip to content

Instantly share code, notes, and snippets.

@samklr
Last active December 17, 2015 19:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samklr/5663427 to your computer and use it in GitHub Desktop.
Save samklr/5663427 to your computer and use it in GitHub Desktop.
object WordCount extends PipelineApp {
def ScrunchWordCount(file: String) = {
read(from.textFile(file))
.flatMap(_.split("\\W+")
.filter(!_.isEmpty()))
.count
}
val counts = join(countWords(args(0)), countWords(args(1)))
write(counts, to.textFile(args(2)))
}
object ScoobiWordCount {
def main(allArgs: Array[String]) = withHadoopArgs(allArgs) { args =>
val lines: DList[String] = fromTextFile(args(0))
val counts: DList[(String, Int)] = lines.flatMap(_.split(" "))
.map(word => (word, 1))
.groupByKey
.combine(_+_)
persist(toTextFile(counts, args(1)))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment