nfarah86/create_table.scala

## create_table.scala
import org.apache.spark.sql.SaveMode._

val basePath = "/tmp/hudi/stocks"
val stocksDF1 = spark.read.json("docker/demo/data/batch_1.json")
val stocksDF2 = spark.read.option("multiline", "true").json("docker/demo/data/batch_2.json").limit(1)

stocksDF1.write.format("hudi").
  option("hoodie.datasource.write.recordkey.field", "symbol").
  option("hoodie.datasource.write.partitionpath.field", "date").
  option("hoodie.datasource.write.precombine.field", "ts").
  option("hoodie.table.name", "stocks").
  mode(Overwrite).
  save(basePath)

stocksDF2.write.format("hudi").
  option("hoodie.datasource.write.recordkey.field", "symbol").
  option("hoodie.datasource.write.partitionpath.field", "date").
  option("hoodie.datasource.write.precombine.field", "ts").
  mode(Append).
  save(basePath)
	import org.apache.spark.sql.SaveMode._

	val basePath = "/tmp/hudi/stocks"
	val stocksDF1 = spark.read.json("docker/demo/data/batch_1.json")
	val stocksDF2 = spark.read.option("multiline", "true").json("docker/demo/data/batch_2.json").limit(1)

	stocksDF1.write.format("hudi").
	option("hoodie.datasource.write.recordkey.field", "symbol").
	option("hoodie.datasource.write.partitionpath.field", "date").
	option("hoodie.datasource.write.precombine.field", "ts").
	option("hoodie.table.name", "stocks").
	mode(Overwrite).
	save(basePath)

	stocksDF2.write.format("hudi").
	option("hoodie.datasource.write.recordkey.field", "symbol").
	option("hoodie.datasource.write.partitionpath.field", "date").
	option("hoodie.datasource.write.precombine.field", "ts").
	mode(Append).
	save(basePath)