This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Requirements: conda env create -n dev python=3.7 numpy | |
from typing import Callable, Optional, Tuple, TypeVar | |
import numpy as np | |
Y = TypeVar("Y") | |
# This could be a np.recarray. See examples in compressed docstr. | |
NpArray = TypeVar("NpArray", bound=np.ndarray) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cm = cm_stats_by_threshold_binned(yp) | |
auc = CmStatsBinned.auc(cm) # 0.708 | |
ap = CmStatsBinned.ap(cm) # 0.372 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import numpy as np | |
def create_data(dist, n=1000, random_state=None): | |
np.random.seed(random_state) | |
yp = dist.rvs(size=n) | |
noise = np.zeros(n) # noise = np.random.normal(0, 0.05, n) | |
yt = np.random.binomial(1, np.clip(yp + noise, 0, 1)) | |
return yt, yp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sklearn.metrics | |
sk_pre, sk_rec, _ = sklearn.metrics.precision_recall_curve(yt, yp) | |
sk_fpr, sk_tpr, _ = sklearn.metrics.roc_curve(yt, yp) | |
sk_ap = sklearn.metrics.average_precision_score(yt, yp) | |
sk_auc = sklearn.metrics.roc_auc_score(yt, yp) | |
print(f"AUC: {sk_auc:0.3} AP: {sk_ap:0.3}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create a label distribution and sample "predictions" and "ground truth" | |
dist = scipy.stats.beta(2, 8) | |
num_samples = 10_000_000 | |
yt, yp = create_data(dist, n=num_samples, random_state=42) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List, Optional, Tuple, Union | |
import numpy as np | |
import pandas as pd | |
import scipy | |
def binned_cm_stats(histogram: Union[np.ndarray, List[int]], | |
bins: Union[np.ndarray, List[float]]) -> pd.DataFrame: | |
"""Produce confusion matrix statistics for a histogram of probability estimates. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ---------------------------------------------------------------------------- | |
# R. M. Deak | |
# | |
# Illustrates the problem of trying to estimate a joint distribution without | |
# the ability to probe points, but instead only lines. This means that | |
# instead of updating the density at particular (point) locations, it is | |
# updated along an entire axis, given a coordinate along the other axis. In | |
# the 2x2 example, the result is that each cell in the joint is an average | |
# of itself and its neighbors. | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install itertools, more_itertools, numpy | |
from typing import Callable, Iterable, Iterator, List, TypeVar, Union | |
import itertools | |
import more_itertools | |
A = TypeVar("A") | |
B = TypeVar("B") | |
def lazy_batched_apply( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# =========================================================================== | |
# LightGBM 3.0.0: Using init_model parameter | |
# =========================================================================== | |
# | |
# INSTALL: pip install lightgbm==3.0.0rc1 | |
# | |
# The goal is to show the initial steps of how to integrate LightGBM and | |
# Ray Tune. | |
from copy import copy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# vvvvvvvvvvvvvvvvvvvvvvvvv Binary Problem 3 def vvvvvvvvvvvvvvvvvvvvvvvvv | |
t = pulp.LpVariable('t', lowBound=0, cat='Continuous') | |
y = [ | |
pulp.LpVariable(f'y{i}', lowBound=0, cat='Continuous') | |
for i in range(num_countries) | |
] | |
z = [ pulp.LpVariable(f'z{i}', cat='Binary') for i in range(num_countries) ] | |
model = pulp.LpProblem("binary best countries", pulp.LpMaximize) | |
model += dot(runners * qs, y) # obj (numerator of orig prob) |
NewerOlder