Last active
March 3, 2021 17:18
-
-
Save anthony-cros/f6d82744523349a65bc86598c79cabdc to your computer and use it in GitHub Desktop.
Reproduces first query in https://databricks.com/blog/2016/02/09/reshaping-data-with-pivot-in-apache-spark.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gallia._ | |
import aptus._ // for divideBy and significantFigures utilities | |
// =========================================================================== | |
object ReshapingDataWithPivotInSparkArticle { | |
// reproduces first query in https://databricks.com/blog/2016/02/09/reshaping-data-with-pivot-in-apache-spark.html | |
// --------------------------------------------------------------------------- | |
// t210224102310 - not actually implemented yet, but will also use | |
// https://github.com/galliaproject/gallia-core/blob/init/src/main/scala/gallia/atoms/AtomsIX.scala#L110 for local runs | |
implicit class Connection_(u: java.sql.Connection) { def stream(tableName: String): HeadZ = HeadZ.Dummy } | |
val conn: java.sql.Connection = null | |
// =========================================================================== | |
def main(args: Array[String]): Unit = { | |
/* | |
sql("""select *, concat('Q', d_qoy) as qoy | |
from store_sales | |
join date_dim on ss_sold_date_sk = d_date_sk | |
join item on ss_item_sk = i_item_sk""") | |
.groupBy("i_category") | |
.pivot("qoy") | |
.agg(round(sum("ss_sales_price")/1000000,2)) | |
.show) | |
*/ | |
// --------------------------------------------------------------------------- | |
// note: not tested (TODO: t210224102851) | |
conn // "jdbc://.../" | |
.stream("store_sales") | |
.generate("qoy").from("Q", "d_qoy").using(_ + " " + _) | |
.innerJoin( | |
conn.stream("date_dim") -> ("ss_sold_date_sk" <~> "d_date_sk"), | |
conn.stream("item") -> ("ss_item_sk" <~> "i_item_sk")) | |
.pivot(_.double("ss_sales_price")) | |
.using { _.sum.divideBy(1000000).significantFigures(2) /* uses aptus utils */ } | |
.rows("i_category") | |
.column("qoy").asNewKeys("Q1", "Q2", "Q3", "Q4") | |
.printJsonl() | |
} | |
} | |
// =========================================================================== |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
changed
columns
tocolumn
to reflect t210303101932 (commit 719bcfd)