Skip to content

Instantly share code, notes, and snippets.

@ebuildy
Last active February 20, 2020 17:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ebuildy/736ddb7160b587f6405c88c556287a17 to your computer and use it in GitHub Desktop.
Save ebuildy/736ddb7160b587f6405c88c556287a17 to your computer and use it in GitHub Desktop.
Flatten Apache Spark Data Frame
import org.apache.spark.sql.Column
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.functions.col
def flattenSchema(schema: StructType, prefix: String = null) : Array[Column] = {
schema.fields.flatMap(f => {
val colName = if (prefix == null) f.name else (prefix + "." + f.name)
f.dataType match {
case st: StructType => flattenSchema(st, colName)
case _ => Array(col(colName))
}
})
}
val flattenedSchema = flattenSchema(df.schema)
val renamedCols = flattenedSchema.map(name => col(name.toString()).as(name.toString().replace(".","_")))
val flatDF = df.select(renamedCols:_*)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment