Skip to content

Instantly share code, notes, and snippets.

@lo
Forked from yoyama/Schema2CaseClass.scala
Created July 6, 2017 10:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lo/9ea7871dabfbdc6654547704e06029b3 to your computer and use it in GitHub Desktop.
Save lo/9ea7871dabfbdc6654547704e06029b3 to your computer and use it in GitHub Desktop.
Generate case class from spark DataFrame/Dataset schema.
/**
* Generate Case class from DataFrame.schema
*
* val df:DataFrame = ...
*
* val s2cc = new Schema2CaseClass
* import s2cc.implicit._
*
* println(s2cc.schemaToCaseClass(df.schema, "MyClass"))
*
*/
import org.apache.spark.sql.types._
class Schema2CaseClass {
type TypeConverter = (DataType) => String
def schemaToCaseClass(schema:StructType, className:String)(implicit tc:TypeConverter):String = {
def genField(s:StructField):String = {
val f = tc(s.dataType)
s match {
case x if(x.nullable) => s" ${s.name}:Option[$f]"
case _ => s" ${s.name}:$f"
}
}
val fieldsStr = schema.map(genField).mkString(",\n ")
s"""
|case class $className (
| $fieldsStr
|)
""".stripMargin
}
object implicits {
implicit val defaultTypeConverter:TypeConverter = (t:DataType) => { t match {
case _:ByteType => "Byte"
case _:ShortType => "Short"
case _:IntegerType => "Int"
case _:LongType => "Long"
case _:FloatType => "Float"
case _:DoubleType => "Double"
case _:DecimalType => "java.math.BigDecimal"
case _:StringType => "String"
case _:BinaryType => "Array[Byte]"
case _:BooleanType => "Boolean"
case _:TimestampType => "java.sql.Timestamp"
case _:DateType => "java.sql.Date"
case _:ArrayType => "scala.collection.Seq"
case _:MapType => "scala.collection.Map"
case _:StructType => "org.apache.spark.sql.Row"
case _ => "String"
}}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment