Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Generate Hive schema from Spark Dataframe
import org.apache.spark.sql.DataFrame
def dataFrameToDDL(dataFrame: DataFrame, tableName: String): String = {
val columns = dataFrame.schema.map { field =>
" " + field.name + " " + field.dataType.simpleString.toUpperCase
}
s"CREATE TABLE $tableName (\n${columns.mkString(",\n")}\n)"
}
import spark.sqlContext.implicits._
// Example of hierarchical structure:
case class Model(`type`: String)
case class Device(`type`: String, model: Model, serial: Long)
case class Event(device: Device, timestamp: Long)
val df = Seq(
Event(Device("Android", Model("Huawei"), 1), 1525354897L)).toDF()
dataFrameToDDL(df, "events")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment