Skip to content

Instantly share code, notes, and snippets.

Last active July 17, 2023 10:08
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save OatmealLick/c8625d559f03135bb00591e69f72ae68 to your computer and use it in GitHub Desktop.
Validate whether Contract Custom YAML Schema matches BigQuery Schema
def _compare_yaml_and_bq_schemas(expected_fields: dict, actual_schema: List[SchemaField]) -> (bool, List[str]):
Expected fields should be the YAML parsed dict.
See `schema.yaml` for example.
Actual schema should be the List of SchemaField, from google-cloud-bigquery.
actual_fields = { field for field in actual_schema}
all_messages = []
aggregated_result = True
for field in expected_fields['fields']:
result, messages = _is_field_valid(field, actual_fields)
if not result:
aggregated_result = False
return aggregated_result, all_messages
def _is_field_valid(expected_field: dict, actual_fields: dict) -> (bool, List[str]):
# check if column exists by name
if expected_field['name'] not in actual_fields:
message = f"Field {expected_field['name']} not found.\nLooked in {actual_fields}"
return False, [message]
actual_field = actual_fields[expected_field['name']]
# check if array
if "repeated" in expected_field and expected_field["repeated"] == "yes":
if actual_field.mode != "REPEATED":
message = f"Field {expected_field['name']} expected to be array but {actual_field} doesn't seem to be one."
return False, [message]
# check type
match expected_field['type']:
case dict():
return _compare_yaml_and_bq_schemas(expected_field['type'], actual_field.fields)
case _:
if expected_field['type'].upper() != actual_field.field_type.upper():
message = f"Field {expected_field['name']} expected to be of type {expected_field['type']}. Field: {actual_field} does not match that expectation."
return False, [message]
return True, []
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment