Skip to content

Instantly share code, notes, and snippets.

@anthony-cros
Created February 23, 2021 19:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anthony-cros/2ceba1be56bd99a8d4bafd2b9f52b9b3 to your computer and use it in GitHub Desktop.
Save anthony-cros/2ceba1be56bd99a8d4bafd2b9f52b9b3 to your computer and use it in GitHub Desktop.
Reproduces go-to Word Count example
import gallia._
// ===========================================================================
object WordCount { // see http://spark.apache.org/examples.html and https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html
def main(args: Array[String]): Unit = {
// INPUT: "Hello World Bye World\nHello Hadoop Goodbye Hadoop"
// ---------------------------------------------------------------------------
println("word count:")
"/tmp/sentences.list"
.stream(_.lines) // also see https://github.com/galliaproject/gallia-core/blob/init/README.md#spark-rdds
.split(_line ~> 'word).by(" ")
.flattenBy('word)
.generate(_count).from('word).using(_ => 1)
.sum(_count).by('word)
.printJsonl()
/*
OUTPUT:
{"word":"Hello","_count":2}
{"word":"World","_count":2}
{"word":"Bye","_count":1}
{"word":"Hadoop","_count":2}
{"word":"Goodbye","_count":1}
*/
// ---------------------------------------------------------------------------
println("\ncount word length:") // because why not
"/tmp/sentences.list"
.stream(_.lines) // also see https://github.com/galliaproject/gallia-core/blob/init/README.md#spark-rdds
.split(_line ~> 'word).by(" ")
.flattenBy('word)
.generate('word_length).from(_.string('word)).using(_.size)
.count('word ~> _count).by('word_length)
.printJsonl()
/*
OUTPUT:
{"word_length":5,"_count":4}
{"word_length":3,"_count":1}
{"word_length":6,"_count":2}
{"word_length":7,"_count":1}
*/
}
}
// ===========================================================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment