Skip to content

Instantly share code, notes, and snippets.

@talagluck
talagluck / glaredb_node_delta.js
Created March 20, 2024 14:58
Read from a delta table in Node.js with GlareDB and write the output to Polars
> npm install nodejs-polars
> npm install @glaredb/glaredb
const pl = require("nodejs-polars");
const glaredb = require("@glaredb/glaredb");
const con = await glaredb.connect();
const df = await (
await con.sql(
"select * from delta_scan('path/to/delta/table')",
)
@talagluck
talagluck / ge_column_mapping.py
Created January 22, 2021 15:31
ge_column_mapping.py
# The mapping method
def map_column_names(expectation_suite, mapping_dict):
for expectation in expectation_suite.expectations:
if 'column' in expectation.get_domain_kwargs():
source_col = expectation.get_domain_kwargs()['column']
if source_col in mapping_dict:
target_col = mapping_dict[source_col]
print('column:', source_col, ' mapping to:', target_col)
expectation.patch('replace', '/column', target_col)
else:
import pandas as pd
from great_expectations.render.renderer import (
ExpectationSuitePageRenderer
)
from great_expectations.render.types import (
RenderedDocumentContent,
RenderedSectionContent,
RenderedHeaderContent,
RenderedTableContent,
import pandas as pd
from great_expectations.render.renderer import (
ExpectationSuitePageRenderer
)
from great_expectations.render.types import (
RenderedDocumentContent,
RenderedSectionContent,
RenderedHeaderContent,
RenderedTableContent,
def _render_data_dictionary(self, expectations):
data_dictionary_df = pd.DataFrame()
data_dictionary_df["Column Name"] = [i for i in self._get_table_columns(expectations)]
data_dictionary_df["Description"] = [i["description"] for i in self._get_table_columns(expectations).values()]
expectations_by_column = self._sort_expectations_by_column(expectations)
data_dictionary_df["Data Type"] = [i for i in self._get_column_data_types(expectations_by_column).values()]
return RenderedComponentContent(**{
"content_block_type": "table",
def _get_column_data_types(self,expectations_by_column):
column_data_type_expectations = {}
for k,v in expectations_by_column.items():
expectation = [i for i in v if i["expectation_type"] == "expect_column_values_to_be_in_type_list"]
if len(expectation)>0:
type_list = expectation[0].get("kwargs").get("type_list")
if len(type_list) > 0:
column_data_type_expectations[k] = type_list[0]
else:
column_data_type_expectations[k] = type_list
def _sort_expectations_by_column(self, expectations):
expectations_by_column = {}
expectations_dictionary = expectations.get("expectations")
column_names = list(self._get_table_columns(expectations).keys())
for column in column_names:
expectations_by_column[column] = list(filter(
lambda x: x.get("kwargs").get("column")==column,
expectations_dictionary))
return expectations_by_column
from great_expectations.render.renderer import (
ExpectationSuitePageRenderer
)
from great_expectations.render.types import (
RenderedDocumentContent,
RenderedSectionContent,
RenderedComponentContent,
)
import pandas as pd
def _get_table_columns(self,expectations):
return expectations.get("meta").get("columns")
from great_expectations.render.renderer import (
ExpectationSuitePageRenderer
)
from great_expectations.render.types import (
RenderedDocumentContent,
RenderedSectionContent,
RenderedComponentContent,
)
class MyCustomExpectationSuitePageRenderer(ExpectationSuitePageRenderer):