Skip to content

Instantly share code, notes, and snippets.

val row_num_df = empsalary.withColumn("row_number", row_number().over(winSpec))
row_num_df.show()
val dense_rank_df = empsalary.withColumn("dense_rank", dense_rank().over(winSpec))
dense_rank_df.show()
@NeerajBhadani
NeerajBhadani / win_rank_func.scala
Created May 25, 2020 15:06
windows rank function
val rank_df = empsalary.withColumn("rank", rank().over(winSpec))
rank_df.show()
@NeerajBhadani
NeerajBhadani / win_rank_window_spec.scala
Created May 25, 2020 15:05
Create Window Specification for Ranking Function
val winSpec = Window.partitionBy("depName").orderBy("salary".desc)
@NeerajBhadani
NeerajBhadani / win_agg_function.scala
Created May 25, 2020 15:02
Window Aggregate function
val agg_sal = empsalary
.withColumn("max_salary", max("salary").over(byDepName))
.withColumn("min_salary", min("salary").over(byDepName))
agg_sal.select("depname", "max_salary", "min_salary")
.dropDuplicates()
.show()
@NeerajBhadani
NeerajBhadani / win_window_spec.scala
Created May 25, 2020 14:59
Create Window Specification
val byDepName = Window.partitionBy("depName")
case class Salary(depName: String, empNo: Long, salary: Long)
val empsalary = Seq(
Salary("sales", 1, 5000),
Salary("personnel", 2, 3900),
Salary("sales", 3, 4800),
Salary("sales", 4, 4800),
Salary("personnel", 5, 3500),
Salary("develop", 7, 4200),
Salary("develop", 8, 6000),
@NeerajBhadani
NeerajBhadani / win_import_lib.scala
Created May 25, 2020 14:55
Import required libraries
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
// explode the array column "slice_col"
val arr_explode_df = temp_df.select($"*", posexplode($"slice_col"))
arr_explode_df.show(truncate=false)
// create an array column with few values to explode.
val temp_df = df.withColumn("slice_col", slice($"array_col2", 1, 2))
.drop("array_col2")