Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save anthony-cros/f6d82744523349a65bc86598c79cabdc to your computer and use it in GitHub Desktop.
Save anthony-cros/f6d82744523349a65bc86598c79cabdc to your computer and use it in GitHub Desktop.
import gallia._
import aptus._ // for divideBy and significantFigures utilities
// ===========================================================================
object ReshapingDataWithPivotInSparkArticle {
// reproduces first query in https://databricks.com/blog/2016/02/09/reshaping-data-with-pivot-in-apache-spark.html
// ---------------------------------------------------------------------------
// t210224102310 - not actually implemented yet, but will also use
// https://github.com/galliaproject/gallia-core/blob/init/src/main/scala/gallia/atoms/AtomsIX.scala#L110 for local runs
implicit class Connection_(u: java.sql.Connection) { def stream(tableName: String): HeadZ = HeadZ.Dummy }
val conn: java.sql.Connection = null
// ===========================================================================
def main(args: Array[String]): Unit = {
/*
sql("""select *, concat('Q', d_qoy) as qoy
from store_sales
join date_dim on ss_sold_date_sk = d_date_sk
join item on ss_item_sk = i_item_sk""")
.groupBy("i_category")
.pivot("qoy")
.agg(round(sum("ss_sales_price")/1000000,2))
.show)
*/
// ---------------------------------------------------------------------------
// note: not tested (TODO: t210224102851)
conn // "jdbc://.../"
.stream("store_sales")
.generate("qoy").from("Q", "d_qoy").using(_ + " " + _)
.innerJoin(
conn.stream("date_dim") -> ("ss_sold_date_sk" <~> "d_date_sk"),
conn.stream("item") -> ("ss_item_sk" <~> "i_item_sk"))
.pivot(_.double("ss_sales_price"))
.using { _.sum.divideBy(1000000).significantFigures(2) /* uses aptus utils */ }
.rows("i_category")
.column("qoy").asNewKeys("Q1", "Q2", "Q3", "Q4")
.printJsonl()
}
}
// ===========================================================================
@anthony-cros
Copy link
Author

changed columns to column to reflect t210303101932 (commit 719bcfd)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment