Skip to content

Instantly share code, notes, and snippets.

@marksparrish
Last active March 18, 2022 17:32
Show Gist options
  • Save marksparrish/19a8c15e24ccd845314a0445fb84b63b to your computer and use it in GitHub Desktop.
Save marksparrish/19a8c15e24ccd845314a0445fb84b63b to your computer and use it in GitHub Desktop.
def data_contract():
return {
'good_column_header': ['possible_header_1', 'possible_header_2],
'another_good_column': ['only_one_possible_header']
}
def _validate_data_contract(df):
vdf = pd.DataFrame(data=None)
df_columns = df.columns
for key, possible_headers in data_contract().items():
# keep if any header in headers is in the df.columns
in_df = False
for heading in possible_headers:
# capture heading as the column we need in the validated df (vdf)
if heading in df_columns:
in_df = heading
if in_df:
vdf[key] = df[in_df].str.strip()
else:
raise ValueError(f"Broken Contract!@! for {key, possible_headers, df_columns}")
return vdf
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment