Skip to content

Instantly share code, notes, and snippets.

@talagluck
Created January 22, 2021 15:31
Show Gist options
  • Save talagluck/724336fbc4b74f5120669d27baee847c to your computer and use it in GitHub Desktop.
Save talagluck/724336fbc4b74f5120669d27baee847c to your computer and use it in GitHub Desktop.
ge_column_mapping.py
# The mapping method
def map_column_names(expectation_suite, mapping_dict):
for expectation in expectation_suite.expectations:
if 'column' in expectation.get_domain_kwargs():
source_col = expectation.get_domain_kwargs()['column']
if source_col in mapping_dict:
target_col = mapping_dict[source_col]
print('column:', source_col, ' mapping to:', target_col)
expectation.patch('replace', '/column', target_col)
else:
print('column:', source_col, 'not in mapping dict')
return expectation_suite
# Testing it out with an expectation suite that has a mix of table level and column level
# expectations, and at least one column expectation that doesn't have a mapping
# This will modify my_suite in place, and will NOT modify the suite on the batch, as
# batch.get_expectation_suite() makes a deepcopy
my_suite = batch.get_expectation_suite()
my_mapping_dict = {'vendor_id': 'mapped_vendor_id', 'pickup_datetime': 'mapped_pickup_datetime'}
map_column_names(my_suite, my_mapping_dict)
# Checking the resulting suite
for e in mapped_suite.expectations:
print(e)
# You can also use UserConfigurableProfiler._display_suite_by_column(suite) for a nicer view of the suite by column
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment