Skip to content

Instantly share code, notes, and snippets.

@jcbozonier
jcbozonier / BoggleBoard.py
Created August 8, 2018 11:24
How I represent a Boggle board in code
[
[SL('P'), SL('B'), DL('S'), SL('L')],
[TL('R'), SL('A'), SL('H'), SL('D')],
[SL('B'), SL('E'), SL('C'), TW('P')],
[SL('L'), DW('S'), SL('E'), SL('K')]
]
@jcbozonier
jcbozonier / foo.py
Created May 22, 2017 12:48
Generating example data
# These are the "true" slopes and intercepts that I made up.
set_parameters = (
(('no degree', 'CA', 'Riverside County'), (5000, 1250)),
(('no degree', 'IL', 'Cook County'), (6500, 1150)),
(('no degree', 'IL', 'Lake County'), (7000, 1350)),
(('degree', 'CA', 'Riverside County'), (6000, 1250)),
(('degree', 'IL', 'Cook County'), (7500, 1150)),
(('degree', 'IL', 'Lake County'), (8000, 1350))
)
@jcbozonier
jcbozonier / foo.py
Created May 22, 2017 12:39
Full hierarchical model
# For use in Jupyter Notebook include the next line
%matplotlib inline
import pymc3 as pm
degree_indexes = degree_index['index'].values
degree_count = len(degree_indexes)
degree_state_indexes = degree_state_indexes_df['index_d'].values
degree_state_count = len(degree_state_indexes)
degree_state_county_indexes = degree_state_county_indexes_df['index_ds'].values
degree_state_county_count = len(degree_state_county_indexes)
@jcbozonier
jcbozonier / foo.py
Created May 22, 2017 12:28
Creating indexes for vectorization
# Index each of the unique variable values
degree_index = salary_df.groupby('degree').all().reset_index().reset_index()[['index', 'degree']]
degree_state_index = salary_df.groupby(['degree', 'state']).all().reset_index().reset_index()[['index', 'degree', 'state']]
degree_state_county_index = salary_df.groupby(['degree', 'state', 'county']).all().reset_index().reset_index()[['index', 'degree', 'state', 'county']]
degree_state_indexes_df = pd.merge(degree_index, degree_state_index, how='inner', on='degree', suffixes=('_d', '_ds'))
degree_state_county_indexes_df = pd.merge(degree_state_indexes_df, degree_state_county_index, how='inner', on=['degree', 'state'])
indexed_salary_df = pd.merge(salary_df, degree_state_county_indexes_df, how='inner', on=['degree', 'state', 'county']).reset_index()
@jcbozonier
jcbozonier / foo.py
Last active May 5, 2017 12:54
Setting up our global priors
import pymc3 as pm
with pm.Model() as model:
global_m = pm.Normal('global_m', mu=0, sd=100**2)
global_m_sd = pm.Uniform('global_m_sd', lower=0, upper=1000)
global_b = pm.Normal('global_b', mu=0, sd=100**2)
global_b_sd = pm.Uniform('global_b_sd', lower=0, upper=1000)
degree_m = pm.Normal('degree_m', mu=global_m, sd=global_m_sd, shape=degree_count)
degree_m_sd = pm.Uniform('degree_m_sd', lower=0, upper=1000, shape=degree_count)
@jcbozonier
jcbozonier / foo.py
Last active April 7, 2017 18:32
Radon Hierarchical Model with state as a level in addition to county. Causes error in PyMC3 of "fatal error: bracket nesting level exceeded maximum of 256"
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
import pandas as pd
data = pd.read_csv('./radon.csv')
state_names = data.state.unique()
county_names = data.county.unique()
@jcbozonier
jcbozonier / py.py
Created January 30, 2017 23:28
Updating our probabilities
hypotheses = [[1/3,1/3,1/3],
[.8, .1, .1],
[.1, .8, .1],
[.1, .1, .8]]
# Updated probabilities
pdf_score = np.array([ss.dirichlet.pdf(hypothesis, [1+1+1, 1+5+4, 1+2+4]) for hypothesis in hypotheses])
probabilities = pdf_score/pdf_score.sum()
list(zip(hypotheses, probabilities))
@jcbozonier
jcbozonier / py.py
Last active January 30, 2017 13:43
Moving to 3 types of nuts
# Each hypothesis in our list has a third
# element now to represent cashews.
hypotheses = [[1/3,1/3,1/3],
[.8, .1, .1],
[.1,.8,.1],
[.1,.1,.8]]
# When we evaluate our pdf for the Dirichlet
# we include a third element here to represent
# the additional nut type we observe.
@jcbozonier
jcbozonier / py.py
Created January 30, 2017 13:39 — forked from anonymous/py.py
Evaluating Walnut and Almond Mix
# The walnut/almond mix ratios we have hypothesized
hypotheses = [[.8, .2],
[.5,.5],
[.2,.8]]
# Evaluate the pdf for each hypothesis
# Note that we only evaluate the hypothesis
# for one nut. If it's 80% we know the
# other must be 20%.
pdf_score = np.array([ss.beta.pdf(hypothesis[0], 1+1, 1+5) for hypothesis in hypotheses])
@jcbozonier
jcbozonier / py.py
Created January 30, 2017 13:38 — forked from anonymous/py.py
Switching from Beta to Dirichlet
hypotheses = [[.8, .2],
[.5,.5],
[.2,.8]]
# Notice how we swapped out the Beta for
# a Dirichlet. The only difference is we
# now pass a list of counts to the pdf
# function. We'll get to why in a bit.
pdf_score = np.array([ss.dirichlet.pdf(hypothesis, [1+1+2, 1+5+3]) for hypothesis in hypotheses])
probabilities = pdf_score/pdf_score.sum()