Skip to content

Instantly share code, notes, and snippets.

Created August 4, 2015 00:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/f02bd79528ac75f57ae8 to your computer and use it in GitHub Desktop.
Save anonymous/f02bd79528ac75f57ae8 to your computer and use it in GitHub Desktop.
def natjoin(left: DataFrame, right: DataFrame, joinType: String): DataFrame = {
// Get the left and the right columns.
val leftCols = left.columns
val rightCols = right.columns
// Find the Common Columns.
val commonCols = leftCols.toSet intersect rightCols.toSet
// TODO: Common columns must equal publishing keys. Else throw exception.
print(commonCols)
// Make the join Condition by equating the same columns in both the dataframes and anding them.
def makeJoinCondition = commonCols.map {col => left(col) === right(col) }.reduce(_ && _)
// Make the select condition to avoid duplication the common columns in the joined Dataframe.
def makeSelectCondition = leftCols.collect { case c if commonCols.contains(c) => left(c) } ++
leftCols.collect { case c if !commonCols.contains(c) => left(c) } ++
rightCols.collect { case c if !commonCols.contains(c) => right(c) }
left.join(right, makeJoinCondition, joinType).select(makeSelectCondition: _*)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment