Skip to content

Instantly share code, notes, and snippets.

View dmh43's full-sized avatar

Dany Haddad dmh43

View GitHub Profile
@dmh43
dmh43 / ratio_ci.py
Created October 25, 2022 19:43
ci coverage of percent change in ctr
import numpy as np
import numpy.linalg as la
import pandas as pd
import numpy.random as rn
import matplotlib.pyplot as plt
import matplotlib
def calc_coverage(var, est, val):
ci_l = est - 1.96 * np.sqrt(var)
ci_r = est + 1.96 * np.sqrt(var)
@dmh43
dmh43 / ranking_metrics.py
Last active July 21, 2021 16:07
ranking metrics that account for imperfect recall of first pass ranker
def calc_ndcg(a, b, num_rel):
ranking_len = len(a)
len_diff = int(num_rel - a.sum())
a = np.r_[a, np.ones(len_diff)]
b = np.r_[b, (b.min() - 1) * np.ones(len_diff)]
if (a.mean() < 1) and (a.mean() > 0): return ndcg_score([a], [b], k=ranking_len)
else: return np.nan
def calc_auc(a, b, num_rel, population_size):
assert a.sum() <= num_rel
@dmh43
dmh43 / auc_pos.py
Last active October 4, 2021 15:29
Estimating AUC using only positive and unknown examples
import numpy as np
from sklearn.metrics import roc_auc_score
y_true = np.r_[np.ones(10000), np.zeros(10000)]
alpha = 0.5
beta = 0.2
y_known = np.r_[(np.random.rand(10000) < alpha).astype(np.float),
(np.random.rand(10000) < beta).astype(np.float)]
# rankings = [np.random.permutation(2000) for i in range(100)]
rankings = [-np.arange(20000) + 100 *np.random.randn(20000) * i for i in range(100)]
@dmh43
dmh43 / f1_var.py
Last active January 16, 2022 20:18
Simulation of variance of estimate of F1 as precision and recall vary from 0 to 1
from collections import defaultdict
import numpy as np
import numpy.linalg as la
import pandas as pd
import numpy.random as rn
import matplotlib.pyplot as plt
def hmean(a, b):
@dmh43
dmh43 / config_startup.py
Created November 25, 2019 19:40
Python config and startup
# in ~/.ipython/profile_default/startup/startup.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import *
from collections import *
from operator import itemgetter
# in ~/.ipython/profile_default/python_config.py
c = get_config()
@dmh43
dmh43 / stream_of_paths.py
Created September 13, 2019 14:18
yield a stream of paths from start to end on a grid
def locs_available_here(d, h, cur):
if cur[0] < d - 1: yield (cur[0] + 1, cur[1])
if cur[1] < h - 1: yield (cur[0], cur[1] + 1)
def new_paths(d, h, path_so_far, to_loc):
end_loc = (d - 1, h - 1)
if to_loc == end_loc:
yield path_so_far + [end_loc]
else:
for new_loc in locs_available_here(d, h, to_loc):

Fair Ranking Extentions

ideas

  • Two sided marketplace extension
    • Recommendation system with fairness to suppliers with inverse-popularity score (like in our presentation)
  • Feldman or Celis et al. methods of enforcing fairness constraints
    • Show that problem is exponential in number of protected attributes
    • Use approximation by being fair for each attribute and then performing rank aggregation

notes

  • Cant have a ranking that has all the following properties (Arrow 1950):
    • Non-dictatorship

Three.JS Cubes

@dmh43
dmh43 / company.json
Created April 28, 2017 17:50
Fetching company 35416 for agent 5128
{
"branchID": 38467,
"branchLogo": null,
"branchName": null,
"ccrAgentID": 337853,
"ccrEmail": "dibarra@xceligent.com",
"ccrFirstName": "Dustin",
"ccrLastName": "Ibarra",
"ccrPhone": "(816) 303-2868",
"csrAgentID": 223349,
@dmh43
dmh43 / agent.json
Created April 28, 2017 17:49
Agent 5128 for 315 N Lindenwood Dr, Olathe, Kansas
{
"addressList": [
{
"address1": "4520 Main St",
"address2": "Suite 1000",
"addressID": null,
"addressType": null,
"city": "Kansas City",
"country": "United States",
"isActive": true,