Created
November 27, 2016 10:04
-
-
Save geoHeil/34cd1bd8ff057d05c75443fc80b126d8 to your computer and use it in GitHub Desktop.
spark null exception
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.log4j.{Level, Logger} | |
import org.apache.spark.SparkConf | |
import org.apache.spark.sql.functions._ | |
import org.apache.spark.sql.{Dataset, SparkSession} | |
case class FooBar(city: String, postcode: String) | |
object Foo extends App { | |
Logger.getLogger("org").setLevel(Level.WARN) | |
val conf: SparkConf = new SparkConf() | |
.setAppName("exception") | |
.setMaster("local[*]") | |
.set("spark.executor.memory", "2G") | |
.set("spark.executor.cores", "4") | |
.set("spark.default.parallelism", "4") | |
.set("spark.driver.memory", "1G") | |
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") | |
val spark: SparkSession = SparkSession | |
.builder() | |
.config(conf) | |
// .enableHiveSupport() | |
.getOrCreate() | |
import spark.implicits._ | |
implicit val e = org.apache.spark.sql.Encoders.DATE | |
val dates = Seq( | |
("2016-01-01", "ABC"), | |
("2016-01-02", "ABC"), | |
("2016-01-03", "POL"), | |
("2016-01-04", "ABC"), | |
("2016-01-05", "POL"), | |
("2016-01-06", "ABC"), | |
("2016-01-08", "ABC"), | |
("2016-01-09", "POL"), | |
("2016-01-07", "POL"), | |
("2016-01-10", "ABC") | |
).toDF("dates", "ISO") | |
.withColumn("dates", 'dates.cast("Date")) | |
dates.show | |
fit(dates) | |
spark.stop | |
def fit(df: Dataset[_]): Any = { | |
import df.sparkSession.implicits._ | |
val dfFiltered = df | |
.filter($"ISO" === "ABC") | |
val someDate = dfFiltered | |
.filter(_ != isnull('dates)) | |
.select(min('dates)).first.toSeq.head | |
println(someDate) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment