Skip to content

Instantly share code, notes, and snippets.

@skp33
Created January 25, 2020 19:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skp33/65121299b3508ab0135b36146fe14853 to your computer and use it in GitHub Desktop.
Save skp33/65121299b3508ab0135b36146fe14853 to your computer and use it in GitHub Desktop.
Create table schema from dataset schema
import java.lang.reflect.Method
import java.net.URI
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.execution.command.ShowCreateTableCommand
import org.apache.spark.sql.types.StructType
def showCreateTableCommand(
spark: SparkSession,
schema: StructType,
tableName: String,
path: String,
inputFormat: Option[String],
partitionColumn: Seq[String],
properties: Map[String, String] = Map.empty[String, String]): String = {
val identifier: TableIdentifier = spark.sessionState.sqlParser.parseTableIdentifier(tableName)
val url: Some[URI] = Some(new URI(path))
val outputFormat: Some[String] =
Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")
val hiveSerDe: Some[String] =
Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")
val compressed = false
val storageFormat =
CatalogStorageFormat(url, inputFormat, outputFormat, hiveSerDe, compressed, properties)
val catalogTable = new CatalogTable(identifier, CatalogTableType.EXTERNAL, storageFormat,
schema, partitionColumnNames = partitionColumn)
val table = ShowCreateTableCommand(identifier)
val field: Method =
table.getClass.getDeclaredMethod("showCreateHiveTable", catalogTable.getClass)
field.setAccessible(true)
field.invoke(table, catalogTable).asInstanceOf[String]
}
val tcstmt = showCreateTableCommand(spark, df.schema, "<table name>",
"<location of data for external table>",
Some("org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat"), Seq("<partition column names>"))
println(tcstmt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment