Skip to content

Instantly share code, notes, and snippets.

@ishassan
Created April 6, 2016 17:03
Show Gist options
  • Save ishassan/6f03a2c1a42d8439cec4000faec14f0d to your computer and use it in GitHub Desktop.
Save ishassan/6f03a2c1a42d8439cec4000faec14f0d to your computer and use it in GitHub Desktop.
Creates some dummy data from Spark and insert it into a Hive table
package testpackage
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.{SparkConf, SparkContext }
import org.apache.spark.sql.hive.HiveContext
object TestJob {
def main(args: Array[String]): Unit ={
val config = new SparkConf().setAppName("Test App")
val sc = new SparkContext(config)
val hiveContext = new HiveContext(sc)
val schema = StructType( Array(
StructField("sparkKey", StringType, true),
StructField("sparkValue", StringType, true)
))
val rdd = sc.parallelize( Seq(Row("sk1", "sv1"), Row("sk2", "sv2"), Row("sk3", "sv3"), Row("sk4", "sv4")) )
val dataFrame = hiveContext.createDataFrame(rdd, schema)
dataFrame.registerTempTable("test_table")
hiveContext.sql("drop table if exists ishassan.test_table")
hiveContext.sql("create table ishassan.test_table as select * from test_table")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment