Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
renaming Array struct columns
case class ElementCaseClass(contrast_ratio: Double, error_text: String, error_x_position: Long, error_x_size: Long)
val dataset = Seq(
(Array(
ElementCaseClass(1.0d, "error1", 1L, 2L),
ElementCaseClass(2.0d, "error2", 100L, 101L),
ElementCaseClass(3.0d, "error3", 1000L, 1001L)
))
).toDF("elements")
dataset.printSchema()
/*
root
|-- elements: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- contrast_ratio: double (nullable = false)
| | |-- error_text: string (nullable = true)
| | |-- error_x_position: long (nullable = false)
| | |-- error_x_size: long (nullable = false)
*/
// As you can see, 'element' is printed instead of 'ElementCaseClass'
case class Results(ratio: Double, text: String, x: Long, size: Long)
val resultsSchema = ScalaReflection.schemaFor[Results].dataType.asInstanceOf[StructType]
dataset.select($"elements".cast(ArrayType(resultsSchema))).printSchema()
/*
root
|-- elements: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- ratio: double (nullable = false)
| | |-- text: string (nullable = true)
| | |-- x: long (nullable = false)
| | |-- size: long (nullable = false)
*/
// Array fields are renamed
dataset.select($"elements".cast(ArrayType(resultsSchema))).write.mode("overwrite").json("/tmp/test-array-json")
/*
{"elements":[{"ratio":1.0,"text":"error1","x":1,"size":2},{"ratio":2.0,"text":"error2","x":100,"size":101},{"ratio":3.0,"text":"error3","x":1000,"size":1001}]}
*/
// Output doesn't have the idea of the underlying struct name
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment