Skip to content

Instantly share code, notes, and snippets.

View shawlu95's full-sized avatar

Shaw Lu shawlu95

View GitHub Profile
@shawlu95
shawlu95 / 01_confidence_interval_load.py
Created April 7, 2019 22:17
01_confidence_interval_load
import numpy as np
engagement = np.loadtxt('data/engagement.csv')
mean = np.mean(engagement)
std = np.std(engagement)
print("""
Population mean: %.5f
Population std: %.5f
Population size: %i
@shawlu95
shawlu95 / 01_confidence_interval_sampling_dist.py
Created April 7, 2019 22:18
01_confidence_interval_sampling_dist.py
sample_size = 300
n_trials = 1000000
# draw one million samples, each of size 300
samples = np.array([np.random.choice(engagement, sample_size)
for _ in range(n_trials)])
# calculate sample mean for each sample
means = samples.mean(axis=1)
@shawlu95
shawlu95 / 01_confidence_interval_fpr.py
Created April 7, 2019 22:20
01_confidence_interval_fpr
# make 95% confidence interval
z = 1.96
se = samples.std(axis=1) / np.sqrt(sample_size)
ups = means + z * se
los = means - z * se
success = np.mean((mean >= los) & (mean <= ups))
fpr = np.mean((mean < los) | (mean > ups))
@shawlu95
shawlu95 / 02_visualizing_beta.py
Created April 7, 2019 22:26
02_visualizing_beta
import numpy as np
from scipy.stats import beta
a1, b1 = 10, 10
rv1 = beta(a1, b1)
a2, b2 = 3, 3
rv2 = beta(a2, b2)
x = np.linspace(0, 1, 100)
class Arm(object):
"""
Each arm's true click through rate is
modeled by a beta distribution.
"""
def __init__(self, idx, a=1, b=1):
"""
Init with uniform prior.
"""
self.idx = idx
def monte_carlo_simulation(arms, draw=100):
"""
Monte Carlo simulation of thetas. Each arm's click through
rate follows a beta distribution.
Parameters
----------
arms list[Arm]: list of Arm objects.
draw int: number of draws in Monte Carlo simulation.
def thompson_sampling(arms):
"""
Stochastic sampling: take one draw for each arm
divert traffic to best draw.
@param arms list[Arm]: list of Arm objects
@return idx int: index of winning arm from sample
"""
sample_p = [arm.draw_ctr() for arm in arms]
idx = np.argmax(sample_p)
def should_terminate(p_winner, est_ctrs, mc, alpha=0.05):
"""
Decide whether experiument should terminate. When value remaining in
experiment is less than 1% of the winning arm's click through rate.
Parameters
----------
p_winner list[float]: probability of each arm being the winner.
est_ctrs list[float]: estimated click through rates.
mc np.matrix: Monte Carlo matrix of dimension (draw, n_arms).
def k_arm_bandit(ctrs, alpha=0.05, burn_in=1000, max_iter=100000, draw=100, silent=False):
"""
Perform stochastic k-arm bandit test. Experiment is terminated when
value remained in experiment drops below certain threshold.
Parameters
----------
ctrs list[float]: true click through rates for each arms.
alpha float: terminate experiment when the (1 - alpha)th percentile
of the remaining value is less than 1% of the winner's click through rate.
@shawlu95
shawlu95 / 04_power.py
Created April 9, 2019 06:35
04_power
def critical_z(alpha=0.05, tail="two"):
"""
Given significance level, compute critical value.
"""
if tail == "two":
p = 1 - alpha / 2
else:
p = 1 - alpha
return norm.ppf(p)