Created
August 4, 2015 00:08
-
-
Save anonymous/f02bd79528ac75f57ae8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def natjoin(left: DataFrame, right: DataFrame, joinType: String): DataFrame = { | |
// Get the left and the right columns. | |
val leftCols = left.columns | |
val rightCols = right.columns | |
// Find the Common Columns. | |
val commonCols = leftCols.toSet intersect rightCols.toSet | |
// TODO: Common columns must equal publishing keys. Else throw exception. | |
print(commonCols) | |
// Make the join Condition by equating the same columns in both the dataframes and anding them. | |
def makeJoinCondition = commonCols.map {col => left(col) === right(col) }.reduce(_ && _) | |
// Make the select condition to avoid duplication the common columns in the joined Dataframe. | |
def makeSelectCondition = leftCols.collect { case c if commonCols.contains(c) => left(c) } ++ | |
leftCols.collect { case c if !commonCols.contains(c) => left(c) } ++ | |
rightCols.collect { case c if !commonCols.contains(c) => right(c) } | |
left.join(right, makeJoinCondition, joinType).select(makeSelectCondition: _*) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment