Skip to content

Instantly share code, notes, and snippets.

@deaktator
deaktator / numpy_weighted_group_by.py
Last active September 7, 2022 08:24
NumPy group by with weighting
# Requirements: conda env create -n dev python=3.7 numpy
from typing import Callable, Optional, Tuple, TypeVar
import numpy as np
Y = TypeVar("Y")
# This could be a np.recarray. See examples in compressed docstr.
NpArray = TypeVar("NpArray", bound=np.ndarray)
@deaktator
deaktator / clf_metrics_score_hist_call.py
Last active February 27, 2022 00:07
Calling code for "Classification Metrics for Score Histograms"
cm = cm_stats_by_threshold_binned(yp)
auc = CmStatsBinned.auc(cm) # 0.708
ap = CmStatsBinned.ap(cm) # 0.372
@deaktator
deaktator / clf_metrics_score_hist_support.py
Last active February 26, 2022 23:17
Supporting code for "Classification Metrics for Score Histograms"
import matplotlib.pyplot as plt
import numpy as np
def create_data(dist, n=1000, random_state=None):
np.random.seed(random_state)
yp = dist.rvs(size=n)
noise = np.zeros(n) # noise = np.random.normal(0, 0.05, n)
yt = np.random.binomial(1, np.clip(yp + noise, 0, 1))
return yt, yp
@deaktator
deaktator / clf_metrics_score_hist_sklearn_metrics.py
Last active February 26, 2022 22:45
scikit-learn classification metrics for "Classification Metrics for Score Histograms"
import sklearn.metrics
sk_pre, sk_rec, _ = sklearn.metrics.precision_recall_curve(yt, yp)
sk_fpr, sk_tpr, _ = sklearn.metrics.roc_curve(yt, yp)
sk_ap = sklearn.metrics.average_precision_score(yt, yp)
sk_auc = sklearn.metrics.roc_auc_score(yt, yp)
print(f"AUC: {sk_auc:0.3} AP: {sk_ap:0.3}")
@deaktator
deaktator / clf_metrics_score_hist_dist.py
Created February 26, 2022 22:26
Distribution set up for "Classification Metrics for Score Histograms" post.
# Create a label distribution and sample "predictions" and "ground truth"
dist = scipy.stats.beta(2, 8)
num_samples = 10_000_000
yt, yp = create_data(dist, n=num_samples, random_state=42)
@deaktator
deaktator / clf_metrics_score_hist.py
Last active February 26, 2022 22:18
Classification Metrics for Score Histograms
from typing import List, Optional, Tuple, Union
import numpy as np
import pandas as pd
import scipy
def binned_cm_stats(histogram: Union[np.ndarray, List[int]],
bins: Union[np.ndarray, List[float]]) -> pd.DataFrame:
"""Produce confusion matrix statistics for a histogram of probability estimates.
@deaktator
deaktator / no_joint_during_sampling.py
Created March 17, 2021 04:38
What happens when you try to update a JPD but have only partial coordinate information
# ----------------------------------------------------------------------------
# R. M. Deak
#
# Illustrates the problem of trying to estimate a joint distribution without
# the ability to probe points, but instead only lines. This means that
# instead of updating the density at particular (point) locations, it is
# updated along an entire axis, given a coordinate along the other axis. In
# the 2x2 example, the result is that each cell in the joint is an average
# of itself and its neighbors.
#
@deaktator
deaktator / lazy_batched_apply.py
Last active September 18, 2020 17:29
Lazily applies a function to mini batches in a lazy way (python) using itertools and more_itertools
# pip install itertools, more_itertools, numpy
from typing import Callable, Iterable, Iterator, List, TypeVar, Union
import itertools
import more_itertools
A = TypeVar("A")
B = TypeVar("B")
def lazy_batched_apply(
@deaktator
deaktator / LightGBM_3.x.x_partial_fitting.py
Last active August 17, 2020 23:46
Mimicking the process of integrating LightGBM 3.x init_model functionality with Ray Tune (or some other hyperparameter tuning loop).
# ===========================================================================
# LightGBM 3.0.0: Using init_model parameter
# ===========================================================================
#
# INSTALL: pip install lightgbm==3.0.0rc1
#
# The goal is to show the initial steps of how to integrate LightGBM and
# Ray Tune.
from copy import copy
@deaktator
deaktator / milfp_1_4.py
Last active August 5, 2019 04:18
Binary problem formulation for linear-fractional 0-1 programming after applying the Charnes-Cooper transformation
# vvvvvvvvvvvvvvvvvvvvvvvvv Binary Problem 3 def vvvvvvvvvvvvvvvvvvvvvvvvv
t = pulp.LpVariable('t', lowBound=0, cat='Continuous')
y = [
pulp.LpVariable(f'y{i}', lowBound=0, cat='Continuous')
for i in range(num_countries)
]
z = [ pulp.LpVariable(f'z{i}', cat='Binary') for i in range(num_countries) ]
model = pulp.LpProblem("binary best countries", pulp.LpMaximize)
model += dot(runners * qs, y) # obj (numerator of orig prob)