nfarah86/incremental_join.scala

## incremental_join.scala
import org.apache.spark.sql.SaveMode._

// simulate a dimension table for stocks
case class Company(symbol: String, name: String, yearFounded: Int)
val records = Seq(
  Company("MSFT", "Microsoft", 1975)
)
records.toDF("symbol", "name", "yearFounded").createOrReplaceTempView("companies")

// join the changed records with the dimension table
incrementalDF.createOrReplaceTempView("changedStocks")
val result = spark.sql("""
  |SELECT s.*, c.name, c.yearFounded
  |FROM changedStocks s
  |JOIN companies c
  |ON s.symbol = c.symbol
""".stripMargin)

val wideTablePath = "/tmp/hudi/stocks_wide"
result.write.format("hudi").
  option("hoodie.datasource.write.recordkey.field", "symbol").
  option("hoodie.datasource.write.partitionpath.field", "date").
  option("hoodie.datasource.write.precombine.field", "ts").
  option("hoodie.table.name", "stocks_wide").
  mode(Append).
  save(wideTablePath)

// query the wide table to see all the columns.
spark.read.format("hudi").load(wideTablePath).show()
	import org.apache.spark.sql.SaveMode._

	// simulate a dimension table for stocks
	case class Company(symbol: String, name: String, yearFounded: Int)
	val records = Seq(
	Company("MSFT", "Microsoft", 1975)
	)
	records.toDF("symbol", "name", "yearFounded").createOrReplaceTempView("companies")

	// join the changed records with the dimension table
	incrementalDF.createOrReplaceTempView("changedStocks")
	val result = spark.sql("""
	\|SELECT s.*, c.name, c.yearFounded
	\|FROM changedStocks s
	\|JOIN companies c
	\|ON s.symbol = c.symbol
	""".stripMargin)

	val wideTablePath = "/tmp/hudi/stocks_wide"
	result.write.format("hudi").
	option("hoodie.datasource.write.recordkey.field", "symbol").
	option("hoodie.datasource.write.partitionpath.field", "date").
	option("hoodie.datasource.write.precombine.field", "ts").
	option("hoodie.table.name", "stocks_wide").
	mode(Append).
	save(wideTablePath)

	// query the wide table to see all the columns.
	spark.read.format("hudi").load(wideTablePath).show()