Created
January 22, 2021 15:31
-
-
Save talagluck/724336fbc4b74f5120669d27baee847c to your computer and use it in GitHub Desktop.
ge_column_mapping.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The mapping method | |
def map_column_names(expectation_suite, mapping_dict): | |
for expectation in expectation_suite.expectations: | |
if 'column' in expectation.get_domain_kwargs(): | |
source_col = expectation.get_domain_kwargs()['column'] | |
if source_col in mapping_dict: | |
target_col = mapping_dict[source_col] | |
print('column:', source_col, ' mapping to:', target_col) | |
expectation.patch('replace', '/column', target_col) | |
else: | |
print('column:', source_col, 'not in mapping dict') | |
return expectation_suite | |
# Testing it out with an expectation suite that has a mix of table level and column level | |
# expectations, and at least one column expectation that doesn't have a mapping | |
# This will modify my_suite in place, and will NOT modify the suite on the batch, as | |
# batch.get_expectation_suite() makes a deepcopy | |
my_suite = batch.get_expectation_suite() | |
my_mapping_dict = {'vendor_id': 'mapped_vendor_id', 'pickup_datetime': 'mapped_pickup_datetime'} | |
map_column_names(my_suite, my_mapping_dict) | |
# Checking the resulting suite | |
for e in mapped_suite.expectations: | |
print(e) | |
# You can also use UserConfigurableProfiler._display_suite_by_column(suite) for a nicer view of the suite by column |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment