talagluck/ge_column_mapping.py

## ge_column_mapping.py
# The mapping method
def map_column_names(expectation_suite, mapping_dict):
    for expectation in expectation_suite.expectations:
        if 'column' in expectation.get_domain_kwargs():
            source_col = expectation.get_domain_kwargs()['column']
            if source_col in mapping_dict:
                target_col = mapping_dict[source_col]
                print('column:', source_col, ' mapping to:', target_col)
                expectation.patch('replace', '/column', target_col)
            else:
                print('column:', source_col, 'not in mapping dict')
    return expectation_suite

# Testing it out with an expectation suite that has a mix of table level and column level
# expectations, and at least one column expectation that doesn't have a mapping
# This will modify my_suite in place, and will NOT modify the suite on the batch, as
# batch.get_expectation_suite() makes a deepcopy

my_suite = batch.get_expectation_suite()
my_mapping_dict = {'vendor_id': 'mapped_vendor_id', 'pickup_datetime': 'mapped_pickup_datetime'}
map_column_names(my_suite, my_mapping_dict)

# Checking the resulting suite
for e in mapped_suite.expectations:
    print(e)

# You can also use UserConfigurableProfiler._display_suite_by_column(suite) for a nicer view of the suite by column
	# The mapping method
	def map_column_names(expectation_suite, mapping_dict):
	for expectation in expectation_suite.expectations:
	if 'column' in expectation.get_domain_kwargs():
	source_col = expectation.get_domain_kwargs()['column']
	if source_col in mapping_dict:
	target_col = mapping_dict[source_col]
	print('column:', source_col, ' mapping to:', target_col)
	expectation.patch('replace', '/column', target_col)
	else:
	print('column:', source_col, 'not in mapping dict')
	return expectation_suite

	# Testing it out with an expectation suite that has a mix of table level and column level
	# expectations, and at least one column expectation that doesn't have a mapping
	# This will modify my_suite in place, and will NOT modify the suite on the batch, as
	# batch.get_expectation_suite() makes a deepcopy

	my_suite = batch.get_expectation_suite()
	my_mapping_dict = {'vendor_id': 'mapped_vendor_id', 'pickup_datetime': 'mapped_pickup_datetime'}
	map_column_names(my_suite, my_mapping_dict)

	# Checking the resulting suite
	for e in mapped_suite.expectations:
	print(e)

	# You can also use UserConfigurableProfiler._display_suite_by_column(suite) for a nicer view of the suite by column