Skip to content

Instantly share code, notes, and snippets.

@geoHeil
Created July 10, 2017 14:32
Show Gist options
  • Save geoHeil/dc9cfb8eca5c06fca01fc9fc03431b2f to your computer and use it in GitHub Desktop.
Save geoHeil/dc9cfb8eca5c06fca01fc9fc03431b2f to your computer and use it in GitHub Desktop.
spark wrong encoder for non product class
import org.apache.spark.SparkConf
import org.apache.spark.sql.{ Encoder, Encoders, SparkSession }
class SomeOtherClass(foo: Int)
case class FooWithSomeOtherClass(a: Int, b: String, bar: SomeOtherClass)
case class FooWithoutOtherClass(a: Int, b: String, bar: Int)
case class Foo(a: Int)
object EncoderSpark extends App {
var conf: SparkConf = new SparkConf()
.setMaster("local[*]")
.setAppName("endoeerTesting")
val spark = SparkSession
.builder()
.config(conf)
.getOrCreate()
import spark.implicits._
implicit val someOtherClassEncoder: Encoder[SomeOtherClass] = Encoders.kryo[SomeOtherClass]
val df1 = Seq(Foo(1), Foo(2)).toDF
val df2 = Seq(FooWithSomeOtherClass(1, "one", new SomeOtherClass(4))).toDS
val df3 = Seq(FooWithoutOtherClass(1, "one", 1), FooWithoutOtherClass(2, "two", 2)).toDS
df3.printSchema
df3.show
val df4 = df3.map(d => FooWithSomeOtherClass(d.a, d.b, new SomeOtherClass(d.bar)))
df4.printSchema
df4.show
df4.collect
spark.stop
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment