Skip to content

Instantly share code, notes, and snippets.

@saisgit
Last active July 25, 2021 16:21
Show Gist options
  • Save saisgit/7523d3412e4dc5634dc2a10b5d6f0a9d to your computer and use it in GitHub Desktop.
Save saisgit/7523d3412e4dc5634dc2a10b5d6f0a9d to your computer and use it in GitHub Desktop.
// Method to generate Boundary Query
def buildBoundaryQuery(column: String, tableName: String): String = {
s"(SELECT MIN(${column}) AS lowerBound, MAX(${column}) AS upperBound FROM ${tableName}) bq"
}
val bq = buildBoundaryQuery("id", tableName)
// Boundary Read Options
val boundaryReadOptions = Map(
"driver" -> JDBC_DRIVER,
"url" -> JDBC_URL,
"user" -> JDBC_USERNAME,
"password" -> JDBC_PASSWORD,
"dbtable" -> bq
)
// Create DataFrame to execute Boundary Query on Source
val boundaryDF = (
spark.read
.options(boundaryReadOptions)
.format("jdbc")
.load
)
// Extract Lower Bound and Upper Bound Values to pass it to readOptions
import org.apache.spark.sql.functions.col
val bounds: (String, String) = (
boundaryDF
.select(col("lowerBound").cast(StringType).alias("lowerBound"), col("upperBound").cast(StringType).alias("upperBound"))
.map(row => (row.getString(0), row.getString(1)))
.first
)
val lowerBound: String = bounds._1
val upperBound: String = bounds._2
println("lowerBound and upperBound for 'id' column")
println(s"lowerBound => ${lowerBound}")
println(s"upperBound => ${upperBound}\n")
/*lowerBound and upperBound for 'id' column
lowerBound => 201
upperBound => 4079
import org.apache.spark.sql.functions.col
bounds: (String, String) = (201,4079)
lowerBound: String = 201
upperBound: String = 4079*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment