Justin Bozonier jcbozonier

## BoggleBoard.py
[
  [SL('P'), SL('B'), DL('S'), SL('L')],
  [TL('R'), SL('A'), SL('H'), SL('D')],
  [SL('B'), SL('E'), SL('C'), TW('P')],
  [SL('L'), DW('S'), SL('E'), SL('K')]
]

## foo.py
# These are the "true" slopes and intercepts that I made up.
set_parameters = (
    (('no degree', 'CA', 'Riverside County'), (5000, 1250)),
    (('no degree', 'IL', 'Cook County'), (6500, 1150)),
    (('no degree', 'IL', 'Lake County'), (7000, 1350)),

    (('degree', 'CA', 'Riverside County'), (6000, 1250)),
    (('degree', 'IL', 'Cook County'), (7500, 1150)),
    (('degree', 'IL', 'Lake County'), (8000, 1350))
)

## foo.py
# For use in Jupyter Notebook include the next line
%matplotlib inline
import pymc3 as pm

degree_indexes = degree_index['index'].values
degree_count = len(degree_indexes)
degree_state_indexes = degree_state_indexes_df['index_d'].values
degree_state_count = len(degree_state_indexes)
degree_state_county_indexes = degree_state_county_indexes_df['index_ds'].values
degree_state_county_count = len(degree_state_county_indexes)

## foo.py
# Index each of the unique variable values
degree_index = salary_df.groupby('degree').all().reset_index().reset_index()[['index', 'degree']]
degree_state_index = salary_df.groupby(['degree', 'state']).all().reset_index().reset_index()[['index', 'degree', 'state']]
degree_state_county_index = salary_df.groupby(['degree', 'state', 'county']).all().reset_index().reset_index()[['index', 'degree', 'state', 'county']]

degree_state_indexes_df = pd.merge(degree_index, degree_state_index, how='inner', on='degree', suffixes=('_d', '_ds'))
degree_state_county_indexes_df = pd.merge(degree_state_indexes_df, degree_state_county_index, how='inner', on=['degree', 'state'])
indexed_salary_df = pd.merge(salary_df, degree_state_county_indexes_df, how='inner', on=['degree', 'state', 'county']).reset_index()


## foo.py
import pymc3 as pm

with pm.Model() as model:
    global_m = pm.Normal('global_m', mu=0, sd=100**2)
    global_m_sd = pm.Uniform('global_m_sd', lower=0, upper=1000)
    global_b = pm.Normal('global_b', mu=0, sd=100**2)
    global_b_sd = pm.Uniform('global_b_sd', lower=0, upper=1000)

    degree_m = pm.Normal('degree_m', mu=global_m, sd=global_m_sd, shape=degree_count)
    degree_m_sd = pm.Uniform('degree_m_sd', lower=0, upper=1000, shape=degree_count)

## foo.py
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
import pandas as pd

data = pd.read_csv('./radon.csv')

state_names = data.state.unique()
county_names = data.county.unique()

## py.py
hypotheses = [[1/3,1/3,1/3],
              [.8, .1, .1],
              [.1, .8, .1],
              [.1, .1, .8]]
# Updated probabilities
pdf_score = np.array([ss.dirichlet.pdf(hypothesis, [1+1+1, 1+5+4, 1+2+4]) for hypothesis in hypotheses])
probabilities = pdf_score/pdf_score.sum()
list(zip(hypotheses, probabilities))

## py.py
# Each hypothesis in our list has a third
# element now to represent cashews.
hypotheses = [[1/3,1/3,1/3],
              [.8, .1, .1],
              [.1,.8,.1],
              [.1,.1,.8]]

# When we evaluate our pdf for the Dirichlet
# we include a third element here to represent
# the additional nut type we observe.

## py.py
# The walnut/almond mix ratios we have hypothesized
hypotheses = [[.8, .2],
              [.5,.5],
              [.2,.8]]

# Evaluate the pdf for each hypothesis
# Note that we only evaluate the hypothesis
# for one nut. If it's 80% we know the
# other must be 20%.
pdf_score = np.array([ss.beta.pdf(hypothesis[0], 1+1, 1+5) for hypothesis in hypotheses])

## py.py
hypotheses = [[.8, .2],
              [.5,.5],
              [.2,.8]]
# Notice how we swapped out the Beta for
# a Dirichlet. The only difference is we
# now pass a list of counts to the pdf
# function. We'll get to why in a bit.
pdf_score = np.array([ss.dirichlet.pdf(hypothesis, [1+1+2, 1+5+3]) for hypothesis in hypotheses])
probabilities = pdf_score/pdf_score.sum()
	[
	[SL('P'), SL('B'), DL('S'), SL('L')],
	[TL('R'), SL('A'), SL('H'), SL('D')],
	[SL('B'), SL('E'), SL('C'), TW('P')],
	[SL('L'), DW('S'), SL('E'), SL('K')]
	]
	# These are the "true" slopes and intercepts that I made up.
	set_parameters = (
	(('no degree', 'CA', 'Riverside County'), (5000, 1250)),
	(('no degree', 'IL', 'Cook County'), (6500, 1150)),
	(('no degree', 'IL', 'Lake County'), (7000, 1350)),

	(('degree', 'CA', 'Riverside County'), (6000, 1250)),
	(('degree', 'IL', 'Cook County'), (7500, 1150)),
	(('degree', 'IL', 'Lake County'), (8000, 1350))
	)
	# For use in Jupyter Notebook include the next line
	%matplotlib inline
	import pymc3 as pm

	degree_indexes = degree_index['index'].values
	degree_count = len(degree_indexes)
	degree_state_indexes = degree_state_indexes_df['index_d'].values
	degree_state_count = len(degree_state_indexes)
	degree_state_county_indexes = degree_state_county_indexes_df['index_ds'].values
	degree_state_county_count = len(degree_state_county_indexes)
	# Index each of the unique variable values
	degree_index = salary_df.groupby('degree').all().reset_index().reset_index()[['index', 'degree']]
	degree_state_index = salary_df.groupby(['degree', 'state']).all().reset_index().reset_index()[['index', 'degree', 'state']]
	degree_state_county_index = salary_df.groupby(['degree', 'state', 'county']).all().reset_index().reset_index()[['index', 'degree', 'state', 'county']]

	degree_state_indexes_df = pd.merge(degree_index, degree_state_index, how='inner', on='degree', suffixes=('_d', '_ds'))
	degree_state_county_indexes_df = pd.merge(degree_state_indexes_df, degree_state_county_index, how='inner', on=['degree', 'state'])
	indexed_salary_df = pd.merge(salary_df, degree_state_county_indexes_df, how='inner', on=['degree', 'state', 'county']).reset_index()
	import pymc3 as pm

	with pm.Model() as model:
	global_m = pm.Normal('global_m', mu=0, sd=100**2)
	global_m_sd = pm.Uniform('global_m_sd', lower=0, upper=1000)
	global_b = pm.Normal('global_b', mu=0, sd=100**2)
	global_b_sd = pm.Uniform('global_b_sd', lower=0, upper=1000)

	degree_m = pm.Normal('degree_m', mu=global_m, sd=global_m_sd, shape=degree_count)
	degree_m_sd = pm.Uniform('degree_m_sd', lower=0, upper=1000, shape=degree_count)
	%matplotlib inline
	import matplotlib.pyplot as plt
	import numpy as np
	import pymc3 as pm
	import pandas as pd

	data = pd.read_csv('./radon.csv')

	state_names = data.state.unique()
	county_names = data.county.unique()
	hypotheses = [[1/3,1/3,1/3],
	[.8, .1, .1],
	[.1, .8, .1],
	[.1, .1, .8]]
	# Updated probabilities
	pdf_score = np.array([ss.dirichlet.pdf(hypothesis, [1+1+1, 1+5+4, 1+2+4]) for hypothesis in hypotheses])
	probabilities = pdf_score/pdf_score.sum()
	list(zip(hypotheses, probabilities))
	# Each hypothesis in our list has a third
	# element now to represent cashews.
	hypotheses = [[1/3,1/3,1/3],
	[.8, .1, .1],
	[.1,.8,.1],
	[.1,.1,.8]]

	# When we evaluate our pdf for the Dirichlet
	# we include a third element here to represent
	# the additional nut type we observe.
	# The walnut/almond mix ratios we have hypothesized
	hypotheses = [[.8, .2],
	[.5,.5],
	[.2,.8]]

	# Evaluate the pdf for each hypothesis
	# Note that we only evaluate the hypothesis
	# for one nut. If it's 80% we know the
	# other must be 20%.
	pdf_score = np.array([ss.beta.pdf(hypothesis[0], 1+1, 1+5) for hypothesis in hypotheses])