Skip to content

Instantly share code, notes, and snippets.

@PaoloLeonard
PaoloLeonard / Dockerfile
Created December 14, 2023 19:04
Streamlit docker app
# app/Dockerfile
FROM python:3.11-slim
WORKDIR /app
RUN apt-get update && apt-get install -y \
build-essential \
curl \
software-properties-common \
@PaoloLeonard
PaoloLeonard / dq_app.py
Last active December 14, 2023 18:52
rootsconf-dq
import streamlit as st
import yaml, json
import matplotlib.pyplot as plt
dq_score, dashboards = st.tabs(['Data quality score', 'Dashboards'])
with dq_score:
st.title(f"""Data quality score""")
schema, checks = {}, {}
@PaoloLeonard
PaoloLeonard / expectation_implementation.py
Last active October 13, 2021 12:48
Expectation implementation for the GE table expectation tutorial.
from copy import deepcopy
from typing import Dict, Tuple, Any, Optional, Callable, List
from great_expectations.core import ExpectationConfiguration
from great_expectations.execution_engine import (
ExecutionEngine
)
from great_expectations.expectations.expectation import TableExpectation
from great_expectations.exceptions.exceptions import InvalidExpectationKwargsError
@PaoloLeonard
PaoloLeonard / table_metric.py
Last active September 8, 2021 20:22
Table metric implementation for the GE table expectation tutorial.
from typing import Dict, Tuple, Any
from great_expectations.core.batch_spec import PathBatchSpec
from great_expectations.execution_engine import (
SparkDFExecutionEngine,
PandasExecutionEngine
)
from great_expectations.expectations.metrics.metric_provider import metric_value
from great_expectations.expectations.metrics.table_metric_provider import (
TableMetricProvider,
@PaoloLeonard
PaoloLeonard / comparator.py
Created September 8, 2021 18:11
Comparator implementation for the GE table expectation tutorial.
from enum import Enum, auto
from statistics import mean
class SupportedComparisonEnum(Enum):
"""Enum class with the currently supported comparison type."""
ABSOLUTE = auto()
MEAN = auto()
@PaoloLeonard
PaoloLeonard / expect_table_row_count_to_be_more_than_others.py
Last active October 28, 2022 11:06
Full implementation of a custom table expectation that compares the considered dataset row count to other datasets row count with the possibility of using different comparison keys.
"""
Custom table expectation which checks whether the row count is greater than the row count of other tables.
There are different ways to compare the row counts:
* With absolute values, if one row count value of the other tables is greater than the current then the validation
fails,
* With mean values, if the mean of value of the other tables row count is greater than the current row count then
the validation fails.
"""
from copy import deepcopy