Skip to content

Instantly share code, notes, and snippets.

@geoHeil
Created November 27, 2016 10:04
Show Gist options
  • Save geoHeil/34cd1bd8ff057d05c75443fc80b126d8 to your computer and use it in GitHub Desktop.
Save geoHeil/34cd1bd8ff057d05c75443fc80b126d8 to your computer and use it in GitHub Desktop.
spark null exception
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{Dataset, SparkSession}
case class FooBar(city: String, postcode: String)
object Foo extends App {
Logger.getLogger("org").setLevel(Level.WARN)
val conf: SparkConf = new SparkConf()
.setAppName("exception")
.setMaster("local[*]")
.set("spark.executor.memory", "2G")
.set("spark.executor.cores", "4")
.set("spark.default.parallelism", "4")
.set("spark.driver.memory", "1G")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
val spark: SparkSession = SparkSession
.builder()
.config(conf)
// .enableHiveSupport()
.getOrCreate()
import spark.implicits._
implicit val e = org.apache.spark.sql.Encoders.DATE
val dates = Seq(
("2016-01-01", "ABC"),
("2016-01-02", "ABC"),
("2016-01-03", "POL"),
("2016-01-04", "ABC"),
("2016-01-05", "POL"),
("2016-01-06", "ABC"),
("2016-01-08", "ABC"),
("2016-01-09", "POL"),
("2016-01-07", "POL"),
("2016-01-10", "ABC")
).toDF("dates", "ISO")
.withColumn("dates", 'dates.cast("Date"))
dates.show
fit(dates)
spark.stop
def fit(df: Dataset[_]): Any = {
import df.sparkSession.implicits._
val dfFiltered = df
.filter($"ISO" === "ABC")
val someDate = dfFiltered
.filter(_ != isnull('dates))
.select(min('dates)).first.toSeq.head
println(someDate)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment