Skip to content

Instantly share code, notes, and snippets.

@igreenfield
Created April 12, 2018 13:09
Show Gist options
  • Save igreenfield/584c3336f03ba7d63e9026774eaf5e23 to your computer and use it in GitHub Desktop.
Save igreenfield/584c3336f03ba7d63e9026774eaf5e23 to your computer and use it in GitHub Desktop.
sample code for question
package com.execution
import java.lang.Thread.UncaughtExceptionHandler
import java.util.concurrent.TimeUnit
import com.typesafe.scalalogging.StrictLogging
import org.apache.spark.sql.SparkSession
object RunExecutionSample extends StrictLogging {
def main(args: Array[String]): Unit = {
val spark = {
SparkSession
.builder()
.master(s"local[${Runtime.getRuntime().availableProcessors() * 2}]")
.config("spark.default.parallelism", "32")
.appName(s"RunExecution")
.getOrCreate()
}
import spark.implicits._
val df = spark
.createDataset((1 to 20).map(index => index -> s"sss-$index"))
.withColumnRenamed("_1", "index")
.withColumnRenamed("_2", "value")
import org.apache.spark.sql.functions._
val newValue = (1 to 10000)
.foldLeft(when(df.col("value") <=> lit("someLit"), "value1"))((exp, index) => {
exp.when(new org.apache.spark.sql.Column("value").endsWith(index.toString), s"value${index * 3}")
})
.otherwise(null)
val newValue2 = (1 to 10000)
.foldLeft(when(new org.apache.spark.sql.Column("calculted") <=> lit("otherLit"), false))((exp, index) => {
exp.when(new org.apache.spark.sql.Column("calculted") <=> lit(s"value${index * 3}"), true)
})
.otherwise(null)
val withNewData = df
.withColumn("calculted", newValue)
.withColumn("calculted1", newValue2)
withNewData.show()
println("========================================================")
println("========================================================")
println("========================================================")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment