Skip to content

Instantly share code, notes, and snippets.

@samklr
Last active December 17, 2015 19:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samklr/5662343 to your computer and use it in GitHub Desktop.
Save samklr/5662343 to your computer and use it in GitHub Desktop.
class WordCountJob(args : Args) extends Job(args) {
TextLine( args("input") )
.flatMap('line -> 'word) { line : String => tokenize(line) }
.groupBy('word) { _.size }
.write( Tsv( args("output") ) )
}
BECOMES
class WordCountJob(args: Args) extends Job(args) {
val lines : TypedPipe[String] = TextLine(args("input"))
val words = lines.flatMap('line -> 'word) { line => line.split("\\s+")
val groupedWord = words.groupBy(identity)
val countedWords = groupedWord.size
countedWords.write(TypedTsv[(String,Long)](args("output")))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment