Skip to content

Instantly share code, notes, and snippets.

@zorteran
Created October 31, 2020 19:37
Show Gist options
  • Save zorteran/3bd7fa662a6fcaef9262228be9f8eab7 to your computer and use it in GitHub Desktop.
Save zorteran/3bd7fa662a6fcaef9262228be9f8eab7 to your computer and use it in GitHub Desktop.
package pl.wiadrodanych.demo.extensions
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import pl.wiadrodanych.demo.base.SparkJob.spark.implicits._
object GroceryDataFrameExtensions {
implicit class RichDataFrame(df: DataFrame) {
def sumByNormalizedName: DataFrame = {
val sumOfFruits = df
.groupBy("normalized_name")
.agg(
sum(($"quantity")).as("sum")
)
sumOfFruits
}
def addNormalizedNameColumn: DataFrame = {
val normalizedFruits = df.withColumn("normalized_name", lower($"name"))
normalizedFruits
}
def filterFruits: DataFrame = {
val fruits = df.filter($"type" === "fruit")
fruits
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment