Skip to content

Instantly share code, notes, and snippets.

@IndianBoy42
Created November 30, 2016 17:14
Show Gist options
  • Save IndianBoy42/bb1f47a6978c8d843117a62c21cd9e11 to your computer and use it in GitHub Desktop.
Save IndianBoy42/bb1f47a6978c8d843117a62c21cd9e11 to your computer and use it in GitHub Desktop.
Some random probability model stuff coded in Python+Numpy (+Matplotlib+itertools)
import numpy as np
import random
import matplotlib.pyplot as plt
import itertools
import simpsons_paradox_data as simp_para_data
def run():
print(bernoulli(0.6))
def bernoulli(p=0.5, a=1, b=0):
return {a: p, b:1-p}
def binomial(p=0.5, n=1, a=1, b=0):
bern = bernoulli(p=p, a=a, b=b)
bino = {}
for i in range(0, n+1):
c = bern[a] ** i * bern[b] ** (n-1)
d = factorial(10) / (factorial(i), factorial(n-i))
bin[n] = c / d
return bin
def factorial(n):
a = 1
for i in range(1, n+1):
a = a * i
return a
def var_range(X, m):
return list(set([X[k] for k in m]))
def var_pmf(X, m):
pmf = {}
for (k, v) in X.items():
try:
pmf[v] += m[k]
except:
pmf[v] = m[k]
return pmf
def combine2(m1, m2=None):
m2 = m2 if m2 != None else m1
return dict([((d1, d2), p1 * p2) for (d1, p1) in m1.items() for (d2, p2) in m2.items()])
def combine3(m1, m2=None, m3=None):
m2 = m2 if m2 != None else m1
m3 = m3 if m3 != None else m2
return dict([((d1, d2, d3), p1 * p2 * p3) for (d1, p1) in m1.items() for (d2, p2) in m2.items() for (d3, p3) in m3.items()])
def marginalize2(m, kc=1):
m2 = {}
for ((k1, k2), v) in m.items():
k = k1 if kc == 2 else k2
try:
m2[k] += v
except:
m2[k] = v
return m2
def marginalize3(m, kc=1):
m2 = {}
for ((k1, k2, k3), v) in m.items():
k = ((k2, k3) if kc == 1 else (k1, k3) if kc == 2 else (k1, k2))
try:
m2[k] += v
except:
m2[k] = v
return m2
def condition2(m, o, kc=1):
m1 = dict(((k2 if kc == 1 else k1), v) for ((k1, k2), v) in m.items() if (k1 if kc == 1 else k2) == o)
norm = sum(m1.values())
return dict((k, v/norm) for (k, v) in m1.items())
def condition3(m, o, kc=1):
m1 = dict((((k2, k3) if kc == 1 else (k1, k3) if kc == 2 else (k1, k2)), v) for ((k1, k2, k3), v) in m.items() if (k1 if kc == 1 else k2 if kc == 2 else k3) == o)
norm = sum(m1.values())
return dict((k, v/norm) for (k, v) in m1.items())
def condition3o(m, o1, o2, kc1=1, kc2=2):
kc = 6-kc1-kc2
m1 = dict((k1 if kc==1 else k2 if kc==2 else k3, v) for ((k1, k2, k3), v) in m.items() if (k1 if kc1 == 1 else k2 if kc1 == 2 else k3) == o1 if (k1 if kc2 == 1 else k2 if kc2 == 2 else k3) == o2)
norm = sum(m1.values())
return dict((k, v/norm) for (k, v) in m1.items())
def sum_models2(m1, m2=None):
m2 = m2 if m2 != None else m1
mc12 = combine2(m1, m2)
m12 = {}
for ((d1, d2), p) in mc12.items():
try:
m12[d1+d2] += p
except:
m12[d1+d2] = p
return m12
def condition(m, s):
ms = dict((k, v) for (k, v) in m.items() if k in s)
norm = sum(ms.values());
return dict((k, v/norm) for (k, v) in ms.items())
def condition_where(m, sf):
ms = dict((k, v) for (k, v) in m.items() if sf(k))
norm = sum(ms.values());
return dict((k, v/norm) for (k, v) in ms.items())
def sum_models3(m1, m2=None, m3=None):
m2 = m2 if m2 != None else m1
m3 = m3 if m3 != None else m2
mc12 = combine3(m1, m2, m3)
m12 = {}
for ((d1, d2, d3), p) in mc12.items():
try:
m12[d1+d2+d3] += p
except:
m12[d1+d2+d3] = p
return m12
def model_freq(m, n=1000):
return [k for ks in [[e]*int(p*n) for (e,p) in m.items()] for k in ks]
def sim_model_once(m):
o, p = zip(*list(m.items()))
return np.random.choice(o, p=p)
def sim_choice(m, M=None):
c = sim_model_once(m)
return c if M==None else M[c]
def sim_model(m, n=1):
for i in range(n):
yield sim_model_once(m)
def count_sim(sim, e, p=0):
ec = 0
cf = []
frac = []
for i in range(len(sim)):
ec += 1 if sim[i] == e else 0
cf += [ec]
frac += [ec/(i+1) - p]
return (ec, cf, frac)
def count_sim_union(sim, es, p=0, m=None):
p = p if m==None else p_event(es, m)
ec = 0
cf = []
frac = []
for i in range(len(sim)):
ec += 1 if sim[i] in es else 0
cf += [ec]
frac += [ec/(i+1) - p]
return (ec, cf, frac)
def count_sim_gen(sim, f, p=0):
ec = 0
cf = []
frac = []
for i in range(len(sim)):
ec += 1 if f(i) else 0
cf += [ec]
frac += [ec/(i+1) - p]
return (ec, cf, frac)
def count_all_sim(sim, m, normalise=False):
mc = []
mcf = []
mfrac = []
for (i, p) in m.items():
(mic, micf, mifr) = count_sim(sim, i, p = p if normalise else 0)
mc += [mic]
mcf += [micf]
mfrac += [mifr]
return (mc, mcf, mfrac)
def event_not(no, m):
return [o for o in m if o != no]
def event_n(e1, e2):
return list(set(e1) & set(e2))
def event_u(e1, e2):
return list(set(e1) | set(e2))
def event_c(e, omega=None, m=None):
return ([] if omega==None and m==None else list(set(omega if m==None else m.keys()) - set(e)))
def p_event(e, m):
return sum([p for (o, p) in m.items() if o in e])
def p_event_where(ef, m):
return sum([p for (o, p) in m.items() if ef(o)])
def event_where(ef, m):
return [(o, p) for (o, p) in m.items() if ef(o)]
def all_events(m, l=None):
if l==None:
return [combi for l in range(len(m.keys())+1) for combi in itertools.combinations(m.keys(), l)]
else:
return itertools.combinations(m.keys(), l)
def plot_fracs(fracs, figsize=None, xlabel="Number of simulations", ylabel="Frequency of event"):
plt.figure(figsize=figsize)
plt.plot(range(1, len(fracs)+1), fracs)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
def plot_all_fracs(fracss, figsize=None, xlabel="Number of simulations", ylabel="Frequency of event"):
plt.figure(figsize=figsize)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
for fracs in fracss:
plt.plot(range(1, len(fracs)+1), fracs)
def ndice(n=6):
return dict([(i, 1/n) for i in range(1,n+1)])
def main():
d6 = ndice()
d20 = ndice(20)
two_dice = combine2(d6, d6)
stwo_dice = sum_models2(d6, d6)
print(marginalize2(two_dice, kc=1))
print(condition2(two_dice, 2))
print(set([(d1, d2) for (d1, d2) in two_dice if d1+d2 == 7]))
print("")
weather = {
('sunny', 'hot'):3/10,
('rainy', 'hot'):1/30,
('snowy', 'hot'):0,
('sunny', 'cold'):1/5,
('rainy', 'cold'):2/15,
('snowy', 'cold'):1/3,
}
print(weather)
print(marginalize2(weather, kc=1))
print(marginalize2(weather, kc=2))
print(condition2(weather, 'cold', kc=2))
print(condition_where(d6, lambda x: x%2==0))
print(p_event_where(lambda x: x[1]==3 and x[0]%2==0, two_dice))
print(p_event_where(lambda x: x[1]==3 or x[0]%2==0, two_dice))
print(condition_where(combine(d6), lambda x: x[1]==3))
print(p_event_where(lambda x: x[0]%2==0, condition_where(combine(d6), lambda x: x[1]==3)))
def simpson():
simpsons_paradox = simp_para_data.prob_space
print(simpsons_paradox)
print(simpsons_paradox[('female', 'C', 'admitted')])
print("")
gender_bias = marginalize3(simpsons_paradox, kc=2)
print(gender_bias)
print(condition2(gender_bias, 'female', kc=1))
print(condition2(gender_bias, 'male', kc=1))
print("")
print(condition3o(simpsons_paradox, 'female', 'F', kc1=1, kc2=2))
print(condition3o(simpsons_paradox, 'male', 'F', kc1=1, kc2=2))
if __name__ == "__main__":
#main()
#hwk()
run()
print("Poop in the jungle")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment