Created
January 17, 2019 18:05
-
-
Save aflaxman/2f5fc9eb8a3f521eb6346cf83592ccfe to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Thu Jan 17 10:04:34 PST 2019\r\n" | |
] | |
} | |
], | |
"source": [ | |
"import numpy as np, matplotlib.pyplot as plt, pandas as pd\n", | |
"pd.set_option('display.max_rows', 8)\n", | |
"!date\n", | |
"\n", | |
"%load_ext autoreload\n", | |
"%autoreload 2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pymc as pm" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Problem: how to evaluate in-sample fit of a model in PyMC? https://stackoverflow.com/questions/54169191/how-to-evaluate-a-pymc2-model-with-train-test-data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" [-----------------100%-----------------] 10000 of 10000 complete in 4.6 secAccuracy on train data = 100.0%\n" | |
] | |
} | |
], | |
"source": [ | |
"number_of_samples = 10000\n", | |
"X = np.random.randn(100, 2)\n", | |
"Y = np.tanh(X[:, 0] + X[:, 1])\n", | |
"Y = 1. / (1. + np.exp(-(Y + Y)))\n", | |
"Y_train = Y > 0.5\n", | |
"\n", | |
"w11 = pm.Normal('w11', mu=0., tau=1.)\n", | |
"w12 = pm.Normal('w12', mu=0., tau=1.)\n", | |
"w21 = pm.Normal('w21', mu=0., tau=1.)\n", | |
"w22 = pm.Normal('w22', mu=0., tau=1.)\n", | |
"w31 = pm.Normal('w31', mu=0., tau=1.)\n", | |
"w32 = pm.Normal('w32', mu=0., tau=1.)\n", | |
"\n", | |
"x1 = X[:, 0]\n", | |
"x2 = X[:, 1]\n", | |
"\n", | |
"x3 = pm.Lambda('x3', lambda w1=w11, w2=w12: np.tanh(w1 * x1 + w2 * x2))\n", | |
"x4 = pm.Lambda('x4', lambda w1=w21, w2=w22: np.tanh(w1 * x1 + w2 * x2))\n", | |
"\n", | |
"@pm.deterministic\n", | |
"def sigmoid(x=w31 * x3 + w32 * x4):\n", | |
" return 1. / (1. + np.exp(-x))\n", | |
"\n", | |
"y = pm.Bernoulli('y', sigmoid, observed=True, value=Y_train)\n", | |
"\n", | |
"model = pm.Model([w11, w12, w21, w22, w31, w32, y])\n", | |
"inference = pm.MCMC(model)\n", | |
"\n", | |
"inference.sample(number_of_samples)\n", | |
"print('Accuracy on train data = {}%'.format((y.value == Y_train).mean() * 100))\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Solution: use a \"posterior predictive check\" (https://docs.pymc.io/notebooks/posterior_predictive.html https://stats.stackexchange.com/questions/115157/what-are-posterior-predictive-checks-and-what-makes-them-useful)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" [-----------------100%-----------------] 10000 of 10000 complete in 5.7 sec" | |
] | |
} | |
], | |
"source": [ | |
"y_pred = pm.Bernoulli('y', sigmoid)\n", | |
"\n", | |
"model = pm.Model([w11, w12, w21, w22, w31, w32, y, y_pred])\n", | |
"inference = pm.MCMC(model)\n", | |
"\n", | |
"inference.sample(number_of_samples)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Accuracy on train data = 100.0%\n" | |
] | |
} | |
], | |
"source": [ | |
"y_pred_samples = y_pred.trace()\n", | |
"y_pred_threshold = (y_pred_samples.mean(axis=0) > .5)\n", | |
"print('Accuracy on train data = {}%'.format((y_pred_threshold == Y_train).mean() * 100))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([0.9856, 0.9848, 0.4217, 0.9669, 0.017 , 0.0845, 0.9027, 0.0715,\n", | |
" 0.0148, 0.1049, 0.9801, 0.3527, 0.0198, 0.9633, 0.0203, 0.9824,\n", | |
" 0.0297, 0.9845, 0.9714, 0.0874, 0.0188, 0.014 , 0.9502, 0.0235,\n", | |
" 0.9833, 0.0392, 0.0153, 0.0138, 0.1002, 0.9865, 0.9623, 0.2509,\n", | |
" 0.4159, 0.6026, 0.9634, 0.9847, 0.0572, 0.9471, 0.9571, 0.9825,\n", | |
" 0.0152, 0.9388, 0.9852, 0.0173, 0.9792, 0.0143, 0.9842, 0.0515,\n", | |
" 0.9866, 0.0165, 0.0792, 0.0343, 0.044 , 0.8983, 0.9335, 0.9775,\n", | |
" 0.0294, 0.0766, 0.986 , 0.0221, 0.0682, 0.0611, 0.8558, 0.0816,\n", | |
" 0.0254, 0.9849, 0.415 , 0.0281, 0.984 , 0.7109, 0.9857, 0.1807,\n", | |
" 0.0454, 0.6199, 0.9682, 0.0487, 0.0142, 0.9728, 0.016 , 0.0268,\n", | |
" 0.0276, 0.0248, 0.0189, 0.9842, 0.9832, 0.753 , 0.9834, 0.985 ,\n", | |
" 0.9814, 0.5429, 0.0149, 0.9861, 0.7057, 0.9016, 0.201 , 0.9829,\n", | |
" 0.0226, 0.0343, 0.126 , 0.9498])" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y_pred_samples.mean(axis=0) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python (dismod_env)", | |
"language": "python", | |
"name": "dismod_env" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment