Clean pandas dataframe using pandera validation. The returned dataframe is the result of droping non-valid records.
import pandas as pd
import pandera as pa
def clean_dataframe_with_schema(dataframe, schema):
return schema.validate(dataframe)
except (pa.errors.SchemaErrors, pa.errors.SchemaError) as err:
return dataframe.drop(labels=err.failure_cases['index'].to_list())
