Skip to content

Instantly share code, notes, and snippets.

@frgomes
Last active December 18, 2022 03:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save frgomes/f5b09eb068a81c2a685f10fe284f77e6 to your computer and use it in GitHub Desktop.
Save frgomes/f5b09eb068a81c2a685f10fe284f77e6 to your computer and use it in GitHub Desktop.
Spark - Compare schemas, ignoring ``nullable`` settting
package object spark {
import org.apache.spark.sql.types.StructType
implicit class StructTypeExtension(schema: StructType) {
import org.apache.spark.sql.types.StructField
implicit def similar(other: StructType): Boolean = _similar(schema, other)
implicit val fieldOrdering: Ordering[StructField] = Ordering.by(field => field.name)
private final def _similar(_this: StructType, _other: StructType): Boolean =
if(_this.fields.size != _other.fields.size) false
else
(_this.fields.sorted zip _other.fields.sorted)
.forall { case (t, o) => _similar(t, o) }
private final def _similar(_this: StructField, _other: StructField): Boolean =
if((_this.dataType.typeName == "struct") && (_other.dataType.typeName == "struct"))
_similar(_this.dataType.asInstanceOf[StructType], _other.dataType.asInstanceOf[StructType])
else
(_this.name == _other.name) && (_this.dataType == _other.dataType) //-- comparison
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment