Last active
May 22, 2017 05:22
-
-
Save devasat/0bacb85b1c61ccf73094bdc4fbc86e93 to your computer and use it in GitHub Desktop.
Querying JSON data using Spark SQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ "id" : { "year" : 2024, "month" : 10 }, "total" : 148 } | |
{ "id" : { "year" : 2037, "month" : 1 }, "total" : 2 } | |
{ "id" : { "year" : 2125, "month" : 7 }, "total" : 46 } | |
{ "id" : { "year" : 2033, "month" : 3 }, "total" : 7 } | |
{ "id" : { "year" : 2027, "month" : 1 }, "total" : 8 } | |
{ "id" : { "year" : 2026, "month" : 10 }, "total" : 5 } | |
{ "id" : { "year" : 2066, "month" : 3 }, "total" : 28 } | |
{ "id" : { "year" : 1980, "month" : 1 }, "total" : 5 } | |
{ "id" : { "year" : 2023, "month" : 1 }, "total" : 1 } | |
{ "id" : { "year" : 2095, "month" : 9 }, "total" : 1 } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
spark-shell --master local | |
val sqlContext = new org.apache.spark.sql.SQLContext(sc) | |
val eventYearMonth=sqlContext.jsonFile("file:///tmp/event-year-month.json") | |
eventYearMonth.printSchema | |
eventYearMonth.registerTempTable("eventYearMonth") | |
sqlContext.sql("select id.month, id.year, total from eventYearMonth ").collect().foreach(println) | |
sqlContext.sql("select sum(total), count(*) from eventYearMonth where id.year=2015 or id.year=2016 ").collect().foreach(println) | |
sqlContext.sql("select sum(total), count(*) from eventYearMonth where id.year!=2015 and id.year!=2016 ").collect().foreach(println) | |
sqlContext.sql("select id.year, id.month, total from eventYearMonth where id.year=2015 or id.year=2016 order by id.year, id.month ").collect().foreach(println) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment