Skip to content

Instantly share code, notes, and snippets.

@achad4
achad4 / gist:d3d08364a1e197b77b5da9b292affffa
Created July 12, 2021 17:29
Python code to generate synthetic lease data and simulate lifecycles from pre-trained model
now = datetime.now()
start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
start = start + timedelta(days=31)
stop = start + timedelta(days=30 * 36)
synth_certs = pd.DataFrame()
for dt in rrule.rrule(rrule.MONTHLY, dtstart=start, until=stop):
#THE SEASONALITY CURVE IS MEASURED (in SQL) DIRECTLY FROM OUR DATA
cohort_size = (
cert_ramp_seasonality_df[
now = datetime.now()
start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
start = start + timedelta(days=31)
stop = start + timedelta(days=30 * 36)
synth_certs = pd.DataFrame()
for dt in rrule.rrule(rrule.MONTHLY, dtstart=start, until=stop):
#THE SEASONALITY CURVE IS MEASURED (in SQL) DIRECTLY FROM OUR DATA
cohort_size = (
cert_ramp_seasonality_df[
now = datetime.now()
start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
start = start + timedelta(days=31)
stop = start + timedelta(days=30 * 36)
synth_certs = pd.DataFrame()
for dt in rrule.rrule(rrule.MONTHLY, dtstart=start, until=stop):
#THE SEASONALITY CURVE IS MEASURED (in SQL) DIRECTLY FROM OUR DATA
cohort_size = (
cert_ramp_seasonality_df[
SELECT
lease_dim_id,
reporting_month AS simulated_default_month
FROM
(
--
SELECT
*,
ROW_NUMBER() OVER(PARTITION BY certificate_dim_id ORDER BY (RANDOM() * COALESCE(perc_losses_expected+in_month,0)) DESC) AS rnk
FROM ll_fact_lease_ts ts
now = datetime.now()
start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
start = start + timedelta(days=31)
stop = start + timedelta(days=30 * 36)
synth_certs = pd.DataFrame()
for dt in rrule.rrule(rrule.MONTHLY, dtstart=start, until=stop):
#THE SEASONALITY CURVE IS MEASURED (in SQL) DIRECTLY FROM OUR DATA
cohort_size = (
cert_ramp_seasonality_df[
@achad4
achad4 / metrics_query_example.sql
Created February 14, 2022 14:00
Example of metrics query for article
WITH month_0 AS
(
SELECT user_id,
metric_a
FROM analytics_table
WHERE execution_date = < month_0 >
),
month_1 AS
(
SELECT user_id,
@achad4
achad4 / customer_spend_anamoly_detection.sql
Last active March 31, 2022 13:20
Ex. Flagging outliers based on Z-scores
WITH transaction AS (
SELECT transaction_id,
customer_id,
state,
amount_spent_usd
FROM FACT_TABLE_TRANSACTION
),
customer_spend AS (
SELECT customer_id,
state,
@achad4
achad4 / customer_spend_anamoly_detection.sql
Last active October 20, 2023 12:32
Ex. Flagging outliers in SQL
WITH transaction AS (
SELECT transaction_id,
customer_id,
state,
amount_spent_usd
FROM < FACT_TABLE_TRANSACTION >
),
customer_spend AS (
SELECT customer_id,
state,
@achad4
achad4 / simple_ml_dag.py
Last active April 7, 2022 22:24
Example of ML DAG using Python operator
from airflow.models import Variable
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
def train_model(lookback_days=30: int):
"""
Trains a model using data from the past <lookback_days> and persists to a model store
"""
...
from airflow.models import Variable
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
MODEL_ENTRY_POINT = "customer_model.py"
# This dynamic configuration mechanism is not ideal but makes for an easy demo
model_version = Variable.get("model_version")