Ryan Deak deaktator

## numpy_weighted_group_by.py
# Requirements:  conda env create -n dev python=3.7 numpy

from typing import Callable, Optional, Tuple, TypeVar
import numpy as np

Y = TypeVar("Y")

# This could be a np.recarray.  See examples in compressed docstr.
NpArray = TypeVar("NpArray", bound=np.ndarray)

## clf_metrics_score_hist_call.py
cm = cm_stats_by_threshold_binned(yp)
auc = CmStatsBinned.auc(cm) # 0.708
ap = CmStatsBinned.ap(cm)   # 0.372

## clf_metrics_score_hist_support.py
import matplotlib.pyplot as plt
import numpy as np


def create_data(dist, n=1000, random_state=None):
    np.random.seed(random_state)
    yp = dist.rvs(size=n)
    noise = np.zeros(n)  # noise = np.random.normal(0, 0.05, n)
    yt = np.random.binomial(1, np.clip(yp + noise, 0, 1))
    return yt, yp

## clf_metrics_score_hist_sklearn_metrics.py
import sklearn.metrics

sk_pre, sk_rec, _ = sklearn.metrics.precision_recall_curve(yt, yp)
sk_fpr, sk_tpr, _ = sklearn.metrics.roc_curve(yt, yp)
sk_ap = sklearn.metrics.average_precision_score(yt, yp)
sk_auc = sklearn.metrics.roc_auc_score(yt, yp)
print(f"AUC: {sk_auc:0.3}  AP: {sk_ap:0.3}")

## clf_metrics_score_hist_dist.py
# Create a label distribution and sample "predictions" and "ground truth"
dist = scipy.stats.beta(2, 8)
num_samples = 10_000_000
yt, yp = create_data(dist, n=num_samples, random_state=42)

## clf_metrics_score_hist.py
from typing import List, Optional, Tuple, Union
import numpy as np
import pandas as pd
import scipy


def binned_cm_stats(histogram: Union[np.ndarray, List[int]],
                    bins: Union[np.ndarray, List[float]]) -> pd.DataFrame:
    """Produce confusion matrix statistics for a histogram of probability estimates.

## no_joint_during_sampling.py
# ----------------------------------------------------------------------------
#  R. M. Deak
#
#  Illustrates the problem of trying to estimate a joint distribution without
#  the ability to probe points, but instead only lines.  This means that
#  instead of updating the density at particular (point) locations, it is
#  updated along an entire axis, given a coordinate along the other axis.  In
#  the 2x2 example, the result is that each cell in the joint is an average
#  of itself and its neighbors.
#

## lazy_batched_apply.py
# pip install itertools, more_itertools, numpy

from typing import Callable, Iterable, Iterator, List, TypeVar, Union
import itertools
import more_itertools

A = TypeVar("A")
B = TypeVar("B")

def lazy_batched_apply(

## LightGBM_3.x.x_partial_fitting.py
# ===========================================================================
#  LightGBM 3.0.0: Using init_model parameter
# ===========================================================================
#
# INSTALL:   pip install lightgbm==3.0.0rc1
#
# The goal is to show the initial steps of how to integrate LightGBM and
# Ray Tune.

from copy import copy

## milfp_1_4.py
# vvvvvvvvvvvvvvvvvvvvvvvvv  Binary Problem 3 def  vvvvvvvvvvvvvvvvvvvvvvvvv
t = pulp.LpVariable('t', lowBound=0, cat='Continuous')
y = [
    pulp.LpVariable(f'y{i}', lowBound=0, cat='Continuous')
    for i in range(num_countries)
]
z = [ pulp.LpVariable(f'z{i}', cat='Binary') for i in range(num_countries) ]

model = pulp.LpProblem("binary best countries", pulp.LpMaximize)
model += dot(runners * qs, y)                # obj (numerator of orig prob)
	# Requirements: conda env create -n dev python=3.7 numpy

	from typing import Callable, Optional, Tuple, TypeVar
	import numpy as np

	Y = TypeVar("Y")

	# This could be a np.recarray. See examples in compressed docstr.
	NpArray = TypeVar("NpArray", bound=np.ndarray)
	cm = cm_stats_by_threshold_binned(yp)
	auc = CmStatsBinned.auc(cm) # 0.708
	ap = CmStatsBinned.ap(cm) # 0.372
	import matplotlib.pyplot as plt
	import numpy as np


	def create_data(dist, n=1000, random_state=None):
	np.random.seed(random_state)
	yp = dist.rvs(size=n)
	noise = np.zeros(n) # noise = np.random.normal(0, 0.05, n)
	yt = np.random.binomial(1, np.clip(yp + noise, 0, 1))
	return yt, yp
	import sklearn.metrics

	sk_pre, sk_rec, _ = sklearn.metrics.precision_recall_curve(yt, yp)
	sk_fpr, sk_tpr, _ = sklearn.metrics.roc_curve(yt, yp)
	sk_ap = sklearn.metrics.average_precision_score(yt, yp)
	sk_auc = sklearn.metrics.roc_auc_score(yt, yp)
	print(f"AUC: {sk_auc:0.3} AP: {sk_ap:0.3}")
	# Create a label distribution and sample "predictions" and "ground truth"
	dist = scipy.stats.beta(2, 8)
	num_samples = 10_000_000
	yt, yp = create_data(dist, n=num_samples, random_state=42)
	from typing import List, Optional, Tuple, Union
	import numpy as np
	import pandas as pd
	import scipy


	def binned_cm_stats(histogram: Union[np.ndarray, List[int]],
	bins: Union[np.ndarray, List[float]]) -> pd.DataFrame:
	"""Produce confusion matrix statistics for a histogram of probability estimates.
	# ----------------------------------------------------------------------------
	# R. M. Deak
	#
	# Illustrates the problem of trying to estimate a joint distribution without
	# the ability to probe points, but instead only lines. This means that
	# instead of updating the density at particular (point) locations, it is
	# updated along an entire axis, given a coordinate along the other axis. In
	# the 2x2 example, the result is that each cell in the joint is an average
	# of itself and its neighbors.
	#
	# pip install itertools, more_itertools, numpy

	from typing import Callable, Iterable, Iterator, List, TypeVar, Union
	import itertools
	import more_itertools

	A = TypeVar("A")
	B = TypeVar("B")

	def lazy_batched_apply(
	# ===========================================================================
	# LightGBM 3.0.0: Using init_model parameter
	# ===========================================================================
	#
	# INSTALL: pip install lightgbm==3.0.0rc1
	#
	# The goal is to show the initial steps of how to integrate LightGBM and
	# Ray Tune.

	from copy import copy
	# vvvvvvvvvvvvvvvvvvvvvvvvv Binary Problem 3 def vvvvvvvvvvvvvvvvvvvvvvvvv
	t = pulp.LpVariable('t', lowBound=0, cat='Continuous')
	y = [
	pulp.LpVariable(f'y{i}', lowBound=0, cat='Continuous')
	for i in range(num_countries)
	]
	z = [ pulp.LpVariable(f'z{i}', cat='Binary') for i in range(num_countries) ]

	model = pulp.LpProblem("binary best countries", pulp.LpMaximize)
	model += dot(runners * qs, y) # obj (numerator of orig prob)