Skip to content

Instantly share code, notes, and snippets.

@lonly197
Created August 29, 2018 16:08
Show Gist options
  • Save lonly197/5b1515743e82f6c7bf506b97c8c0b580 to your computer and use it in GitHub Desktop.
Save lonly197/5b1515743e82f6c7bf506b97c8c0b580 to your computer and use it in GitHub Desktop.
Union two DataFrames with different amounts of columns in spark
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
def concat(df1: DataFrame, df2: DataFrame): DataFrame = {
val cols1 = df1.columns.toSet
val cols2 = df2.columns.toSet
val total = cols1 ++ cols2 // union
def expr(myCols: Set[String], allCols: Set[String]) = {
allCols.toList.map(x => x match {
case x if myCols.contains(x) => col(x)
case _ => lit(null).as(x)
})
}
df1.select(expr(cols1, total):_*)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment