Created
February 23, 2021 19:33
-
-
Save anthony-cros/2ceba1be56bd99a8d4bafd2b9f52b9b3 to your computer and use it in GitHub Desktop.
Reproduces go-to Word Count example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gallia._ | |
// =========================================================================== | |
object WordCount { // see http://spark.apache.org/examples.html and https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html | |
def main(args: Array[String]): Unit = { | |
// INPUT: "Hello World Bye World\nHello Hadoop Goodbye Hadoop" | |
// --------------------------------------------------------------------------- | |
println("word count:") | |
"/tmp/sentences.list" | |
.stream(_.lines) // also see https://github.com/galliaproject/gallia-core/blob/init/README.md#spark-rdds | |
.split(_line ~> 'word).by(" ") | |
.flattenBy('word) | |
.generate(_count).from('word).using(_ => 1) | |
.sum(_count).by('word) | |
.printJsonl() | |
/* | |
OUTPUT: | |
{"word":"Hello","_count":2} | |
{"word":"World","_count":2} | |
{"word":"Bye","_count":1} | |
{"word":"Hadoop","_count":2} | |
{"word":"Goodbye","_count":1} | |
*/ | |
// --------------------------------------------------------------------------- | |
println("\ncount word length:") // because why not | |
"/tmp/sentences.list" | |
.stream(_.lines) // also see https://github.com/galliaproject/gallia-core/blob/init/README.md#spark-rdds | |
.split(_line ~> 'word).by(" ") | |
.flattenBy('word) | |
.generate('word_length).from(_.string('word)).using(_.size) | |
.count('word ~> _count).by('word_length) | |
.printJsonl() | |
/* | |
OUTPUT: | |
{"word_length":5,"_count":4} | |
{"word_length":3,"_count":1} | |
{"word_length":6,"_count":2} | |
{"word_length":7,"_count":1} | |
*/ | |
} | |
} | |
// =========================================================================== |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment