Last active
September 7, 2022 18:05
-
-
Save talagluck/25533944dd8b0525be6b35607f7169e3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from great_expectations.render.renderer import ( | |
ExpectationSuitePageRenderer | |
) | |
from great_expectations.render.types import ( | |
RenderedDocumentContent, | |
RenderedSectionContent, | |
RenderedHeaderContent, | |
RenderedTableContent, | |
) | |
class MyCustomExpectationSuitePageRenderer(ExpectationSuitePageRenderer): | |
def render(self, expectations): | |
columns, ordered_columns = self._group_and_order_expectations_by_column( | |
expectations | |
) | |
expectation_suite_name = expectations.expectation_suite_name | |
overview_content_blocks = [ | |
self._render_data_dictionary_header(expectations), | |
self._render_data_dictionary(expectations), | |
self._render_expectation_suite_header(), | |
self._render_expectation_suite_info(expectations), | |
] | |
table_level_expectations_content_block = self._render_table_level_expectations( | |
columns | |
) | |
if table_level_expectations_content_block is not None: | |
overview_content_blocks.append(table_level_expectations_content_block) | |
asset_notes_content_block = self._render_expectation_suite_notes(expectations) | |
if asset_notes_content_block is not None: | |
overview_content_blocks.append(asset_notes_content_block) | |
sections = [ | |
RenderedSectionContent( | |
**{ | |
"section_name": "Overview", | |
"content_blocks": overview_content_blocks, | |
} | |
) | |
] | |
sections += [ | |
self._column_section_renderer.render(expectations=columns[column]) | |
for column in ordered_columns | |
if column != "_nocolumn" | |
] | |
return RenderedDocumentContent( | |
**{ | |
"renderer_type": "ExpectationSuitePageRenderer", | |
"page_title": expectation_suite_name, | |
"expectation_suite_name": expectation_suite_name, | |
"utm_medium": "expectation-suite-page", | |
"sections": sections, | |
} | |
) | |
def _render_data_dictionary(self, expectations): | |
data_dictionary_df = pd.DataFrame() | |
if self._get_table_columns(expectations): | |
data_dictionary_df["Column Name"] = [i for i in self._get_table_columns(expectations)] | |
data_dictionary_df["Description"] = [i["description"] for i in self._get_table_columns(expectations).values()] | |
expectations_by_column = self._sort_expectations_by_column(expectations) | |
data_dictionary_df["Data Type"] = [i for i in self._get_column_data_types(expectations_by_column).values()] | |
data_dictionary_df["Nullity"] = [i for i in self._get_column_nullity(expectations_by_column).values()] | |
return RenderedTableContent(**{ | |
"content_block_type": "table", | |
"header_row": ["Column Name", "Description", "Data Type", "Nullity"], | |
"header": "Data Dictionary", | |
"table": data_dictionary_df.values, | |
"styling": { | |
"classes": ["col-12", "table-responsive"], | |
"styles": { | |
"margin-top": "20px", | |
"margin-bottom": "20px" | |
}, | |
"body": { | |
"classes": ["table", "table-sm"] | |
} | |
}, | |
}) | |
def _render_data_dictionary_header(self, expectations): | |
return RenderedHeaderContent(**{ | |
"content_block_type": "header", | |
"header": "Data Dictionary", | |
"styling": { | |
"classes": ["col-12"], | |
"header": { | |
"classes": ["alert", "alert-secondary"] | |
} | |
} | |
}) | |
def _get_column_data_types(self,expectations_by_column): | |
column_data_type_expectations = {} | |
for k,v in expectations_by_column.items(): | |
expectation = [i for i in v if i["expectation_type"] == "expect_column_values_to_be_of_type"] | |
if len(expectation)>0: | |
type_list = expectation[0].kwargs.get("type_") | |
column_data_type_expectations[k] = type_list | |
else: | |
column_data_type_expectations[k] = None | |
return column_data_type_expectations | |
def _get_column_nullity(self, expectations_by_column): | |
column_null_expectations = {} | |
expectation_types = self._get_column_data_types(expectations_by_column) | |
for k,v in expectation_types.items(): | |
column_expectations = expectations_by_column[k] | |
null_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_values_to_not_be_null"] | |
if len(null_expectation) > 0: | |
nullity = null_expectation[0].kwargs.get("mostly") | |
if nullity is None: | |
column_null_expectations[k] = "never" | |
else: | |
column_null_expectations[k] = f"At most {(1-nullity)*100}% missing" | |
else: | |
column_null_expectations[k] = None | |
return column_null_expectations | |
def _sort_expectations_by_column(self, expectations): | |
expectations_by_column = {} | |
expectations_dictionary = expectations.expectations | |
column_names = list(self._get_table_columns(expectations).keys()) | |
for column in column_names: | |
expectations_by_column[column] = list(filter( | |
lambda x: x.kwargs.get("column")==column, | |
expectations_dictionary)) | |
return expectations_by_column | |
def _get_table_columns(self,expectations): | |
return expectations.meta.get("columns") |
Done - thank you, @yassineAlouini ! Just a heads-up, this code is fairly out of date and due for a refresh.
Will keep this in mind, thanks.
Running into issues with this working
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for this extension!
One small suggestion: change the file extension to
.py
so that the code is colored in the gist.