Created
November 30, 2016 17:14
-
-
Save IndianBoy42/bb1f47a6978c8d843117a62c21cd9e11 to your computer and use it in GitHub Desktop.
Some random probability model stuff coded in Python+Numpy (+Matplotlib+itertools)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import random | |
import matplotlib.pyplot as plt | |
import itertools | |
import simpsons_paradox_data as simp_para_data | |
def run(): | |
print(bernoulli(0.6)) | |
def bernoulli(p=0.5, a=1, b=0): | |
return {a: p, b:1-p} | |
def binomial(p=0.5, n=1, a=1, b=0): | |
bern = bernoulli(p=p, a=a, b=b) | |
bino = {} | |
for i in range(0, n+1): | |
c = bern[a] ** i * bern[b] ** (n-1) | |
d = factorial(10) / (factorial(i), factorial(n-i)) | |
bin[n] = c / d | |
return bin | |
def factorial(n): | |
a = 1 | |
for i in range(1, n+1): | |
a = a * i | |
return a | |
def var_range(X, m): | |
return list(set([X[k] for k in m])) | |
def var_pmf(X, m): | |
pmf = {} | |
for (k, v) in X.items(): | |
try: | |
pmf[v] += m[k] | |
except: | |
pmf[v] = m[k] | |
return pmf | |
def combine2(m1, m2=None): | |
m2 = m2 if m2 != None else m1 | |
return dict([((d1, d2), p1 * p2) for (d1, p1) in m1.items() for (d2, p2) in m2.items()]) | |
def combine3(m1, m2=None, m3=None): | |
m2 = m2 if m2 != None else m1 | |
m3 = m3 if m3 != None else m2 | |
return dict([((d1, d2, d3), p1 * p2 * p3) for (d1, p1) in m1.items() for (d2, p2) in m2.items() for (d3, p3) in m3.items()]) | |
def marginalize2(m, kc=1): | |
m2 = {} | |
for ((k1, k2), v) in m.items(): | |
k = k1 if kc == 2 else k2 | |
try: | |
m2[k] += v | |
except: | |
m2[k] = v | |
return m2 | |
def marginalize3(m, kc=1): | |
m2 = {} | |
for ((k1, k2, k3), v) in m.items(): | |
k = ((k2, k3) if kc == 1 else (k1, k3) if kc == 2 else (k1, k2)) | |
try: | |
m2[k] += v | |
except: | |
m2[k] = v | |
return m2 | |
def condition2(m, o, kc=1): | |
m1 = dict(((k2 if kc == 1 else k1), v) for ((k1, k2), v) in m.items() if (k1 if kc == 1 else k2) == o) | |
norm = sum(m1.values()) | |
return dict((k, v/norm) for (k, v) in m1.items()) | |
def condition3(m, o, kc=1): | |
m1 = dict((((k2, k3) if kc == 1 else (k1, k3) if kc == 2 else (k1, k2)), v) for ((k1, k2, k3), v) in m.items() if (k1 if kc == 1 else k2 if kc == 2 else k3) == o) | |
norm = sum(m1.values()) | |
return dict((k, v/norm) for (k, v) in m1.items()) | |
def condition3o(m, o1, o2, kc1=1, kc2=2): | |
kc = 6-kc1-kc2 | |
m1 = dict((k1 if kc==1 else k2 if kc==2 else k3, v) for ((k1, k2, k3), v) in m.items() if (k1 if kc1 == 1 else k2 if kc1 == 2 else k3) == o1 if (k1 if kc2 == 1 else k2 if kc2 == 2 else k3) == o2) | |
norm = sum(m1.values()) | |
return dict((k, v/norm) for (k, v) in m1.items()) | |
def sum_models2(m1, m2=None): | |
m2 = m2 if m2 != None else m1 | |
mc12 = combine2(m1, m2) | |
m12 = {} | |
for ((d1, d2), p) in mc12.items(): | |
try: | |
m12[d1+d2] += p | |
except: | |
m12[d1+d2] = p | |
return m12 | |
def condition(m, s): | |
ms = dict((k, v) for (k, v) in m.items() if k in s) | |
norm = sum(ms.values()); | |
return dict((k, v/norm) for (k, v) in ms.items()) | |
def condition_where(m, sf): | |
ms = dict((k, v) for (k, v) in m.items() if sf(k)) | |
norm = sum(ms.values()); | |
return dict((k, v/norm) for (k, v) in ms.items()) | |
def sum_models3(m1, m2=None, m3=None): | |
m2 = m2 if m2 != None else m1 | |
m3 = m3 if m3 != None else m2 | |
mc12 = combine3(m1, m2, m3) | |
m12 = {} | |
for ((d1, d2, d3), p) in mc12.items(): | |
try: | |
m12[d1+d2+d3] += p | |
except: | |
m12[d1+d2+d3] = p | |
return m12 | |
def model_freq(m, n=1000): | |
return [k for ks in [[e]*int(p*n) for (e,p) in m.items()] for k in ks] | |
def sim_model_once(m): | |
o, p = zip(*list(m.items())) | |
return np.random.choice(o, p=p) | |
def sim_choice(m, M=None): | |
c = sim_model_once(m) | |
return c if M==None else M[c] | |
def sim_model(m, n=1): | |
for i in range(n): | |
yield sim_model_once(m) | |
def count_sim(sim, e, p=0): | |
ec = 0 | |
cf = [] | |
frac = [] | |
for i in range(len(sim)): | |
ec += 1 if sim[i] == e else 0 | |
cf += [ec] | |
frac += [ec/(i+1) - p] | |
return (ec, cf, frac) | |
def count_sim_union(sim, es, p=0, m=None): | |
p = p if m==None else p_event(es, m) | |
ec = 0 | |
cf = [] | |
frac = [] | |
for i in range(len(sim)): | |
ec += 1 if sim[i] in es else 0 | |
cf += [ec] | |
frac += [ec/(i+1) - p] | |
return (ec, cf, frac) | |
def count_sim_gen(sim, f, p=0): | |
ec = 0 | |
cf = [] | |
frac = [] | |
for i in range(len(sim)): | |
ec += 1 if f(i) else 0 | |
cf += [ec] | |
frac += [ec/(i+1) - p] | |
return (ec, cf, frac) | |
def count_all_sim(sim, m, normalise=False): | |
mc = [] | |
mcf = [] | |
mfrac = [] | |
for (i, p) in m.items(): | |
(mic, micf, mifr) = count_sim(sim, i, p = p if normalise else 0) | |
mc += [mic] | |
mcf += [micf] | |
mfrac += [mifr] | |
return (mc, mcf, mfrac) | |
def event_not(no, m): | |
return [o for o in m if o != no] | |
def event_n(e1, e2): | |
return list(set(e1) & set(e2)) | |
def event_u(e1, e2): | |
return list(set(e1) | set(e2)) | |
def event_c(e, omega=None, m=None): | |
return ([] if omega==None and m==None else list(set(omega if m==None else m.keys()) - set(e))) | |
def p_event(e, m): | |
return sum([p for (o, p) in m.items() if o in e]) | |
def p_event_where(ef, m): | |
return sum([p for (o, p) in m.items() if ef(o)]) | |
def event_where(ef, m): | |
return [(o, p) for (o, p) in m.items() if ef(o)] | |
def all_events(m, l=None): | |
if l==None: | |
return [combi for l in range(len(m.keys())+1) for combi in itertools.combinations(m.keys(), l)] | |
else: | |
return itertools.combinations(m.keys(), l) | |
def plot_fracs(fracs, figsize=None, xlabel="Number of simulations", ylabel="Frequency of event"): | |
plt.figure(figsize=figsize) | |
plt.plot(range(1, len(fracs)+1), fracs) | |
plt.xlabel(xlabel) | |
plt.ylabel(ylabel) | |
def plot_all_fracs(fracss, figsize=None, xlabel="Number of simulations", ylabel="Frequency of event"): | |
plt.figure(figsize=figsize) | |
plt.xlabel(xlabel) | |
plt.ylabel(ylabel) | |
for fracs in fracss: | |
plt.plot(range(1, len(fracs)+1), fracs) | |
def ndice(n=6): | |
return dict([(i, 1/n) for i in range(1,n+1)]) | |
def main(): | |
d6 = ndice() | |
d20 = ndice(20) | |
two_dice = combine2(d6, d6) | |
stwo_dice = sum_models2(d6, d6) | |
print(marginalize2(two_dice, kc=1)) | |
print(condition2(two_dice, 2)) | |
print(set([(d1, d2) for (d1, d2) in two_dice if d1+d2 == 7])) | |
print("") | |
weather = { | |
('sunny', 'hot'):3/10, | |
('rainy', 'hot'):1/30, | |
('snowy', 'hot'):0, | |
('sunny', 'cold'):1/5, | |
('rainy', 'cold'):2/15, | |
('snowy', 'cold'):1/3, | |
} | |
print(weather) | |
print(marginalize2(weather, kc=1)) | |
print(marginalize2(weather, kc=2)) | |
print(condition2(weather, 'cold', kc=2)) | |
print(condition_where(d6, lambda x: x%2==0)) | |
print(p_event_where(lambda x: x[1]==3 and x[0]%2==0, two_dice)) | |
print(p_event_where(lambda x: x[1]==3 or x[0]%2==0, two_dice)) | |
print(condition_where(combine(d6), lambda x: x[1]==3)) | |
print(p_event_where(lambda x: x[0]%2==0, condition_where(combine(d6), lambda x: x[1]==3))) | |
def simpson(): | |
simpsons_paradox = simp_para_data.prob_space | |
print(simpsons_paradox) | |
print(simpsons_paradox[('female', 'C', 'admitted')]) | |
print("") | |
gender_bias = marginalize3(simpsons_paradox, kc=2) | |
print(gender_bias) | |
print(condition2(gender_bias, 'female', kc=1)) | |
print(condition2(gender_bias, 'male', kc=1)) | |
print("") | |
print(condition3o(simpsons_paradox, 'female', 'F', kc1=1, kc2=2)) | |
print(condition3o(simpsons_paradox, 'male', 'F', kc1=1, kc2=2)) | |
if __name__ == "__main__": | |
#main() | |
#hwk() | |
run() | |
print("Poop in the jungle") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment