Skip to content

Instantly share code, notes, and snippets.

import org.apache.spark.SparkContext
val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)
val namenode = ""
val dataFrame1 = sqlContext.read.load(namenode + "/share/SampleData/AirlineDemoSmallParquet")
dataFrame1.registerTempTable("AirlineDemoSmallTempTable")
sqlContext.sql("create table AirlineDemoSmallHive as select * from AirlineDemoSmallTempTable");
@eqbalz
eqbalz / merge_col.R
Last active November 7, 2016 21:12
Sample code to merge two xdfs with different column names
# create an xdf file which has a different column name for 'age' than original sample
colInfo <- list(
RowNum=list(type="integer"),
age = list(newName = "person.age", type = "factor", levels = c("17-20", "21-24", "25-29", "30-34", "35-39", "40-49", "50-59", "60+")),
car.age = list(type = "factor", levels = c("0-3", "4-7", "8-9", "10+")),
type = list(type="factor", levels=c("A", "B", "C", "D")),
cost= list(newName= "cost", type="float32"),
number = list(newName = "number", type="float32")
)