Last active
July 17, 2023 10:08
-
-
Save OatmealLick/c8625d559f03135bb00591e69f72ae68 to your computer and use it in GitHub Desktop.
Validate whether Contract Custom YAML Schema matches BigQuery Schema
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _compare_yaml_and_bq_schemas(expected_fields: dict, actual_schema: List[SchemaField]) -> (bool, List[str]): | |
""" | |
Expected fields should be the YAML parsed dict. | |
See `schema.yaml` for example. | |
Actual schema should be the List of SchemaField, from google-cloud-bigquery. | |
""" | |
actual_fields = {field.name: field for field in actual_schema} | |
all_messages = [] | |
aggregated_result = True | |
for field in expected_fields['fields']: | |
result, messages = _is_field_valid(field, actual_fields) | |
all_messages.extend(messages) | |
if not result: | |
aggregated_result = False | |
return aggregated_result, all_messages | |
def _is_field_valid(expected_field: dict, actual_fields: dict) -> (bool, List[str]): | |
# check if column exists by name | |
if expected_field['name'] not in actual_fields: | |
message = f"Field {expected_field['name']} not found.\nLooked in {actual_fields}" | |
logging.error(message) | |
return False, [message] | |
actual_field = actual_fields[expected_field['name']] | |
# check if array | |
if "repeated" in expected_field and expected_field["repeated"] == "yes": | |
if actual_field.mode != "REPEATED": | |
message = f"Field {expected_field['name']} expected to be array but {actual_field} doesn't seem to be one." | |
logging.error(message) | |
return False, [message] | |
# check type | |
match expected_field['type']: | |
case dict(): | |
return _compare_yaml_and_bq_schemas(expected_field['type'], actual_field.fields) | |
case _: | |
if expected_field['type'].upper() != actual_field.field_type.upper(): | |
message = f"Field {expected_field['name']} expected to be of type {expected_field['type']}. Field: {actual_field} does not match that expectation." | |
logging.error(message) | |
return False, [message] | |
return True, [] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment