Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from great_expectations.render.renderer import (
ExpectationSuitePageRenderer
)
from great_expectations.render.types import (
RenderedDocumentContent,
RenderedSectionContent,
RenderedComponentContent,
)
import pandas as pd
class MyCustomExpectationSuitePageRenderer(ExpectationSuitePageRenderer):
def __init__(self):
self.data_types = {
"integer": set(["INTEGER", "int", "INT", "TINYINT", "BYTEINT", "SMALLINT", "BIGINT", "IntegerType", "LongType", "DECIMAL"]),
"float": set(["FLOAT", "FLOAT4", "FLOAT8", "DOUBLE_PRECISION", "NUMERIC", "FloatType", "DoubleType", "float"]),
"string": set(["CHAR", "VARCHAR", "TEXT", "StringType", "string", "str"]),
"boolean": set(["BOOLEAN", "BOOL", "bool", "BooleanType"]),
"datetime": set(["DATETIME", "DATE", "TIMESTAMP", "DateType", "TimestampType", "datetime64", "Timestamp"]),
}
def render(self, expectations):
rendered_document_content = super().render(expectations)
content_blocks = [
self._render_data_dictionary_header(expectations),
self._render_data_dictionary(expectations)
]
rendered_document_content["sections"] = [
RenderedSectionContent(**{
"section_name": "Table Summary",
"content_blocks": content_blocks
})
] + rendered_document_content["sections"]
return rendered_document_content
def _render_data_dictionary_header(self, expectations):
return RenderedComponentContent(**{
"content_block_type": "header",
"header": "Table Summary",
"styling": {
"classes": ["col-12"],
"header": {
"classes": ["alert", "alert-secondary"]
}
}
})
def _render_data_dictionary(self, expectations):
data_dictionary_df = pd.DataFrame()
expectations_by_column = self._sort_expectations_by_column(expectations)
data_dictionary_df["Column Name"] = [i for i in self._get_table_columns(expectations)]
data_dictionary_df["Description"] = [i["description"] for i in self._get_table_columns(expectations).values()]
data_dictionary_df["Data Type"] = [i for i in self._get_column_data_types(expectations_by_column).values()]
data_dictionary_df["Nullity"] = [i for i in self._get_column_nullity(expectations_by_column).values()]
return RenderedComponentContent(**{
"content_block_type": "table",
"header_row": ["Column Name", "Description","Data Type", "Nullity"],
"header": "Data Dictionary",
"table": data_dictionary_df.values,
"styling": {
"classes": ["col-12", "table-responsive"],
"styles": {
"margin-top": "20px",
"margin-bottom": "20px"
},
"body": {
"classes": ["table", "table-sm"]
}
},
})
def _sort_expectations_by_column(self, expectations):
expectations_by_column = {}
expectations_dictionary = expectations.get("expectations")
column_names = list(self._get_table_columns(expectations).keys())
for column in column_names:
expectations_by_column[column] = list(filter(
lambda x: x.get("kwargs").get("column")==column,
expectations_dictionary))
return expectations_by_column
def _get_column_data_types(self,expectations_by_column):
column_data_type_expectations = {}
for k,v in expectations_by_column.items():
expectation = [i for i in v if i["expectation_type"] == "expect_column_values_to_be_in_type_list"]
if len(expectation)>0:
type_list = expectation[0].get("kwargs").get("type_list")
if len(type_list) > 0:
type_lookup = [k for k,v in self.data_types.items() if type_list[0] in v]
column_data_type_expectations[k], = type_lookup
else:
column_data_type_expectations[k] = None
return column_data_type_expectations
def _get_column_min(self, expectations_by_column):
column_min_expectations = {}
expectation_types = self._get_column_data_types(expectations_by_column)
for k,v in expectation_types.items():
if (v == "integer" or v == "float"):
column_expectations = expectations_by_column[k]
min_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_min_to_be_between"]
if len(min_expectation) > 0:
min_value = min_expectation[0].get("kwargs").get("min_value")
column_min_expectations[k] = min_value
else:
column_min_expectations[k] = None
else:
column_min_expectations[k] = None
return column_min_expectations
def _get_column_max(self, expectations_by_column):
column_max_expectations = {}
expectation_types = self._get_column_data_types(expectations_by_column)
for k,v in expectation_types.items():
if (v == "integer" or v == "float"):
column_expectations = expectations_by_column[k]
max_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_max_to_be_between"]
if len(max_expectation) > 0:
max_value = max_expectation[0].get("kwargs").get("max_value")
column_max_expectations[k] = max_value
else:
column_max_expectations[k] = None
else:
column_max_expectations[k] = None
return column_max_expectations
def _get_column_nullity(self, expectations_by_column):
column_null_expectations = {}
expectation_types = self._get_column_data_types(expectations_by_column)
for k,v in expectation_types.items():
column_expectations = expectations_by_column[k]
null_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_values_to_not_be_null"]
if len(null_expectation) > 0:
nullity = null_expectation[0].get("kwargs").get("mostly")
if nullity is None:
column_null_expectations[k] = "never"
else:
column_null_expectations[k] = f"At most {(1-nullity)*100}% missing"
else:
column_null_expectations[k] = None
return column_null_expectations
def _get_table_columns(self,expectations):
return expectations.get("meta").get("columns")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.