Skip to content

Instantly share code, notes, and snippets.

@soonraah
Created June 14, 2021 03:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save soonraah/62f4bdb2d732ac813fb6f299a39e0d08 to your computer and use it in GitHub Desktop.
Save soonraah/62f4bdb2d732ac813fb6f299a39e0d08 to your computer and use it in GitHub Desktop.
package com.example
import org.apache.spark.sql.functions.avg
import org.apache.spark.sql.SparkSession
object IsolationLevelExperiment {
def main(args: Array[String]): Unit = {
// Prepare SparkSession
val spark = SparkSession
.builder()
.appName("Isolation Level Experiment")
.master("local[*]")
.getOrCreate()
import spark.implicits._
// Read from MySQL
val dfEmployee = spark
.read
.format("jdbc")
.option("url", "jdbc:mysql://localhost")
.option("dbtable", "db_name.employees")
.option("user", "user_name")
.option("password", "********")
.option("driver", "com.mysql.cj.jdbc.Driver")
.load
.cache
// Get average salary
val dfAvg = dfEmployee
.groupBy($"department_id")
.agg(avg($"salary").as("avg_salary"))
// Calculate diff
val dfResult = dfEmployee
.as("e")
.join(
dfAvg.as("a"),
$"e.department_id" === $"a.department_id",
"left_outer"
)
.select(
$"e.id",
$"e.department_id",
($"e.salary" - $"a.avg_salary").as("salary_diff")
)
// Output results
dfResult.show
spark.stop()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment