Created
October 13, 2020 16:06
-
-
Save spbail/6723728aecf0295bf50622d07e09840e to your computer and use it in GitHub Desktop.
Code snippet that demonstrates how to use Great Expectations to validate Excel files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import pandas as pd | |
import great_expectations as ge | |
import great_expectations.jupyter_ux | |
from great_expectations.data_context.types.resource_identifiers import ( | |
ValidationResultIdentifier, | |
) | |
# You'll have to run `great_expectations init` to create a context that can be loaded here | |
# I also had to set up a "dummy" datasource called data__dir pointing at a directory | |
# using `great_expectations datasource new` | |
context = ge.data_context.DataContext() | |
# Load the data from Excel | |
excel_df = pd.read_excel("my_excel_file_path", header=1) | |
# Create a new batch and start adding expectations | |
batch_kwargs = { | |
"dataset": excel_df, | |
"datasource": "data__dir" | |
} | |
batch = context.get_batch( | |
batch_kwargs = batch_kwargs, | |
expectation_suite_name = "my_suite" | |
) | |
batch.head() | |
batch.expect_column_values_to_not_be_null("vendor_id") | |
# Use the standard worfklow to save the suite to file | |
batch.save_expectation_suite(discard_failed_expectations=False) | |
results = context.run_validation_operator("action_list_operator", assets_to_validate=[batch]) | |
validation_result_identifier = results.list_validation_result_identifiers()[0] | |
# This builds and opens data docs | |
context.build_data_docs() | |
context.open_data_docs(validation_result_identifier) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How do I generate expectation suite automatically for mulitple csv/excel files?