Skip to content

Instantly share code, notes, and snippets.

@deaktator
deaktator / milfp_1_3.py
Last active August 5, 2019 04:19
PuLP definition of linear program after applying the Charnes-Cooper transformation
import numpy as np
import pulp # https://pythonhosted.org/PuLP/
# vvvvvvvvvvvvvvvvvvvvvvvvv convenience methods vvvvvvvvvvvvvvvvvvvvvvvvv
def dot(coefficients, variables):
return pulp.LpAffineExpression(zip(variables, coefficients))
def print_variables(t, y):
print(f't: {t.varValue}')
for i in range(len(y)):
@deaktator
deaktator / milfp_1_2.py
Created August 5, 2019 03:52
Brute force search of all possible solutions
import numpy as np
import itertools
import pulp # https://pythonhosted.org/PuLP/
def brute_force_best(
runners_per_country,
qs,
max_countries,
min_runners):
@deaktator
deaktator / milfp_1_1.py
Created August 5, 2019 03:49
Objective function for running grouped races where only top n finishers are rewarded
import numpy as np
import itertools
import pulp # https://pythonhosted.org/PuLP/
def count_to_ind_lookup(counts):
lookup = {}
ind = 0
for i, c in enumerate(counts):
lookup.update(dict(zip(range(ind, ind + c), [i] * c)))
ind += c
@deaktator
deaktator / metrics_for_calibrated_classifiers.py
Created May 24, 2019 03:44
Experiments showing metrics for calibrated classifiers (w/o needing ground truth)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import beta, uniform
from sklearn.metrics import (
f1_score,
accuracy_score,
precision_score,
recall_score,
average_precision_score
@deaktator
deaktator / const_approx_4.scala
Created March 19, 2019 07:38
Final constant-time approximation of const_approx_1.scala
def something(n: Long, k: Long): Double =
1 - (n - k) / k.toDouble * math.log((n + 0.5) / (n - k + 0.5))
@deaktator
deaktator / const_approx_3.scala
Created March 19, 2019 07:37
More simplified version of const_approx_2.scala
def something(n: Long, k: Long): BigDecimal = {
def h(i: Long, total: BigDecimal): BigDecimal = {
if (k < i) total
else {
val p = BigDecimal.exact(i) / (n - k + i)
val ap = p / k
h(i + 1, total + ap)
}
}
h(1, BigDecimal.exact(0))
@deaktator
deaktator / const_approx_2.scala
Created March 19, 2019 07:35
Intermediate simplification of const_approx_1.scala
def something(n: Long, k: Long): BigDecimal = {
// For convenience.
type BD = BigDecimal
def bd(v: Long) = BigDecimal.exact(v)
@scala.annotation.tailrec // OLD VERSION:
def h(i: Long, total: BD): BD = { // def h(ni: Long, ki: Long, total: DB) = {
if (i <= 0) total // if (ki <= 0) total
else { // else {
val p = bd(i) / denom(n,k,i) // val p = bd(ki) / ni
@deaktator
deaktator / const_approx_1.scala
Created March 19, 2019 07:33
Original function to be approximated
def something(n: Long, k: Long): BigDecimal = {
@scala.annotation.tailrec
def h(ni: Long, ki: Long, total: BigDecimal): BigDecimal = {
if (ki <= 0) total
else {
val p = BigDecimal.exact(ki) / ni
val ap = p / k
h(ni - 1, ki - 1, total + ap)
}
}
@deaktator
deaktator / wt_cv_eval_is_diff_unwt_cv.py
Last active March 11, 2019 03:57
Shows sklearn cross validation w/ unweighted metrics return different optimal param values than when using sample weights in metrics.
# ============================================================================
# R.M. Deak wt_cv_eval_is_diff_unwt_cv.py
#
# Runs scikit-learn's cross validation with GridSearchCV and shows that
# different optimal parameter values may be returned by GridSearchCV than
# when using cross validation with sample_weights passed to the scoring
# function.
# ============================================================================
from typing import NamedTuple
@deaktator
deaktator / hypergeom_sklearn_imp_wt.py
Created March 5, 2019 03:00
Using the hypergeometric distribution to model the expected accuracy in cross validation.
from scipy.stats import hypergeom
# Run the following to see the graph:
# for i in range(1, 400):
# print(f"{i}\t{exp_test_acc(400, 201, i)}")
#
def exp_test_acc(pop_size, pos_in_pop, fold1_size):
fold2_size = pop_size - fold1_size
E_acc = 0.0