Skip to content

Instantly share code, notes, and snippets.

@talagluck
Last active September 7, 2022 18:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save talagluck/25533944dd8b0525be6b35607f7169e3 to your computer and use it in GitHub Desktop.
Save talagluck/25533944dd8b0525be6b35607f7169e3 to your computer and use it in GitHub Desktop.
import pandas as pd
from great_expectations.render.renderer import (
ExpectationSuitePageRenderer
)
from great_expectations.render.types import (
RenderedDocumentContent,
RenderedSectionContent,
RenderedHeaderContent,
RenderedTableContent,
)
class MyCustomExpectationSuitePageRenderer(ExpectationSuitePageRenderer):
def render(self, expectations):
columns, ordered_columns = self._group_and_order_expectations_by_column(
expectations
)
expectation_suite_name = expectations.expectation_suite_name
overview_content_blocks = [
self._render_data_dictionary_header(expectations),
self._render_data_dictionary(expectations),
self._render_expectation_suite_header(),
self._render_expectation_suite_info(expectations),
]
table_level_expectations_content_block = self._render_table_level_expectations(
columns
)
if table_level_expectations_content_block is not None:
overview_content_blocks.append(table_level_expectations_content_block)
asset_notes_content_block = self._render_expectation_suite_notes(expectations)
if asset_notes_content_block is not None:
overview_content_blocks.append(asset_notes_content_block)
sections = [
RenderedSectionContent(
**{
"section_name": "Overview",
"content_blocks": overview_content_blocks,
}
)
]
sections += [
self._column_section_renderer.render(expectations=columns[column])
for column in ordered_columns
if column != "_nocolumn"
]
return RenderedDocumentContent(
**{
"renderer_type": "ExpectationSuitePageRenderer",
"page_title": expectation_suite_name,
"expectation_suite_name": expectation_suite_name,
"utm_medium": "expectation-suite-page",
"sections": sections,
}
)
def _render_data_dictionary(self, expectations):
data_dictionary_df = pd.DataFrame()
if self._get_table_columns(expectations):
data_dictionary_df["Column Name"] = [i for i in self._get_table_columns(expectations)]
data_dictionary_df["Description"] = [i["description"] for i in self._get_table_columns(expectations).values()]
expectations_by_column = self._sort_expectations_by_column(expectations)
data_dictionary_df["Data Type"] = [i for i in self._get_column_data_types(expectations_by_column).values()]
data_dictionary_df["Nullity"] = [i for i in self._get_column_nullity(expectations_by_column).values()]
return RenderedTableContent(**{
"content_block_type": "table",
"header_row": ["Column Name", "Description", "Data Type", "Nullity"],
"header": "Data Dictionary",
"table": data_dictionary_df.values,
"styling": {
"classes": ["col-12", "table-responsive"],
"styles": {
"margin-top": "20px",
"margin-bottom": "20px"
},
"body": {
"classes": ["table", "table-sm"]
}
},
})
def _render_data_dictionary_header(self, expectations):
return RenderedHeaderContent(**{
"content_block_type": "header",
"header": "Data Dictionary",
"styling": {
"classes": ["col-12"],
"header": {
"classes": ["alert", "alert-secondary"]
}
}
})
def _get_column_data_types(self,expectations_by_column):
column_data_type_expectations = {}
for k,v in expectations_by_column.items():
expectation = [i for i in v if i["expectation_type"] == "expect_column_values_to_be_of_type"]
if len(expectation)>0:
type_list = expectation[0].kwargs.get("type_")
column_data_type_expectations[k] = type_list
else:
column_data_type_expectations[k] = None
return column_data_type_expectations
def _get_column_nullity(self, expectations_by_column):
column_null_expectations = {}
expectation_types = self._get_column_data_types(expectations_by_column)
for k,v in expectation_types.items():
column_expectations = expectations_by_column[k]
null_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_values_to_not_be_null"]
if len(null_expectation) > 0:
nullity = null_expectation[0].kwargs.get("mostly")
if nullity is None:
column_null_expectations[k] = "never"
else:
column_null_expectations[k] = f"At most {(1-nullity)*100}% missing"
else:
column_null_expectations[k] = None
return column_null_expectations
def _sort_expectations_by_column(self, expectations):
expectations_by_column = {}
expectations_dictionary = expectations.expectations
column_names = list(self._get_table_columns(expectations).keys())
for column in column_names:
expectations_by_column[column] = list(filter(
lambda x: x.kwargs.get("column")==column,
expectations_dictionary))
return expectations_by_column
def _get_table_columns(self,expectations):
return expectations.meta.get("columns")
@yassineAlouini
Copy link

Thanks for this extension!

One small suggestion: change the file extension to .py so that the code is colored in the gist.

@talagluck
Copy link
Author

Done - thank you, @yassineAlouini ! Just a heads-up, this code is fairly out of date and due for a refresh.

@yassineAlouini
Copy link

Will keep this in mind, thanks.

@nguyenann13
Copy link

Running into issues with this working

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment