Skip to content

Instantly share code, notes, and snippets.

@zuynew
Created January 12, 2020 15:17
def transformSchemaFieldNames(
s0: StructType,
transformSchemaFieldName0: (String) => String
): StructType = {
def loop(f0: StructField): StructField = {
f0.dataType match {
case s1: ArrayType =>
s1.elementType match {
case f1: StructType =>
f0.copy(
name = transformSchemaFieldName0(f0.name),
dataType = s1.copy(
elementType =
transformSchemaFieldNames(f1, transformSchemaFieldName0)
)
)
case _ =>
f0.copy(name = transformSchemaFieldName0(f0.name))
}
case s1: StructType =>
f0.copy(
name = transformSchemaFieldName0(f0.name),
dataType = transformSchemaFieldNames(s1, transformSchemaFieldName0)
)
case _ =>
f0.copy(name = transformSchemaFieldName0(f0.name))
}
}
s0.copy(fields = s0.map(loop).toArray)
}
def castNestedDataframe(df0: DataFrame, schema: StructType): DataFrame = {
def loop0(schema: List[StructField],
prefixes: Vector[String] = Vector()): List[(String, DataType)] = {
schema match {
case h :: tail =>
val newPrefixes = prefixes :+ h.name
h.dataType match {
case f: ArrayType =>
f.elementType match {
case f0: StructType =>
(newPrefixes.mkString("."), f) :: loop0(tail, prefixes)
case _ => loop0(tail, prefixes)
}
case f: StructType =>
(newPrefixes.mkString("."), f) :: loop0(tail, prefixes)
case _ => loop0(tail, Vector())
}
case Nil => List()
}
}
val casts = loop0(
transformSchemaFieldNames(
df0.schema,
transformNames.transformDatasetFieldName
).toList
)
casts.foldLeft(
df0.toDF(df0.columns.map(transformNames.transformDatasetFieldName): _*)
)({
case (df1, (fieldName, cast)) =>
df1.withColumn(fieldName, col(fieldName).cast(cast))
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment