Skip to content

Instantly share code, notes, and snippets.

@mmmika
Forked from cotdp/CandyCrushExample.scala
Created December 1, 2018 23:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mmmika/f16a6171ab54501577a3757a8e821177 to your computer and use it in GitHub Desktop.
Save mmmika/f16a6171ab54501577a3757a8e821177 to your computer and use it in GitHub Desktop.
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.json4s.jackson.JsonMethods
import org.json4s.jackson.JsonMethods._
import org.json4s.JsonAST._
import org.json4s.DefaultFormats
object CandyCrushExample {
def main(args: Array[String]): Unit = {
val sc = new SparkContext("local[8]", "CandyCrushExample")
implicit lazy val formats = DefaultFormats
// We want to extract the level number from "Yay, I completed level 576 in Candy Crush Saga!"
// the actual text will change based on the users language but parsing the 'last number' works
val pattern = """(\d+)""".r
// Produces a RDD[String]
val lines = sc.textFile("facebook-2014-05-19.json")
lines.map(line => {
// Parse the JSON
parse(line)
}).filter(json => {
// Filter out only 'Candy Crush Saga' activity
(json \ "facebook" \ "application").extract[String] == "Candy Crush Saga"
}).map(json => {
// Extract the 'level' or default to zero
var level = 0;
pattern.findAllIn( compact(json \ "interaction" \ "title") ).matchData.foreach(m => {
level = m.group(1).toInt
})
// Extract the gender
val gender = compact(json \ "demographic" \ "gender")
// Return a Tuple of RDD[gender: String, (level: Int, count: Int)]
( gender, (level, 1) )
}).filter(a => {
// Filter out entries with a level of zero
a._2._1 > 0
}).reduceByKey( (a, b) => {
// Sum the levels and counts so we can average later
( a._1 + b._1, a._2 + b._2 )
}).collect().foreach(entry => {
// Print the results
val gender = entry._1
val values = entry._2
val average = values._1 / values._2
println(gender + ": average=" + average + ", count=" + values._2 )
})
/* Results:
"female": average=114, count=15422
"male": average=104, count=14727
"mostly_male": average=97, count=2824
"mostly_female": average=99, count=1934
"unisex": average=113, count=2674
: average=93, count=11023
*/
sc.stop()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment