Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aflaxman/2f5fc9eb8a3f521eb6346cf83592ccfe to your computer and use it in GitHub Desktop.
Save aflaxman/2f5fc9eb8a3f521eb6346cf83592ccfe to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Thu Jan 17 10:04:34 PST 2019\r\n"
]
}
],
"source": [
"import numpy as np, matplotlib.pyplot as plt, pandas as pd\n",
"pd.set_option('display.max_rows', 8)\n",
"!date\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pymc as pm"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Problem: how to evaluate in-sample fit of a model in PyMC? https://stackoverflow.com/questions/54169191/how-to-evaluate-a-pymc2-model-with-train-test-data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" [-----------------100%-----------------] 10000 of 10000 complete in 4.6 secAccuracy on train data = 100.0%\n"
]
}
],
"source": [
"number_of_samples = 10000\n",
"X = np.random.randn(100, 2)\n",
"Y = np.tanh(X[:, 0] + X[:, 1])\n",
"Y = 1. / (1. + np.exp(-(Y + Y)))\n",
"Y_train = Y > 0.5\n",
"\n",
"w11 = pm.Normal('w11', mu=0., tau=1.)\n",
"w12 = pm.Normal('w12', mu=0., tau=1.)\n",
"w21 = pm.Normal('w21', mu=0., tau=1.)\n",
"w22 = pm.Normal('w22', mu=0., tau=1.)\n",
"w31 = pm.Normal('w31', mu=0., tau=1.)\n",
"w32 = pm.Normal('w32', mu=0., tau=1.)\n",
"\n",
"x1 = X[:, 0]\n",
"x2 = X[:, 1]\n",
"\n",
"x3 = pm.Lambda('x3', lambda w1=w11, w2=w12: np.tanh(w1 * x1 + w2 * x2))\n",
"x4 = pm.Lambda('x4', lambda w1=w21, w2=w22: np.tanh(w1 * x1 + w2 * x2))\n",
"\n",
"@pm.deterministic\n",
"def sigmoid(x=w31 * x3 + w32 * x4):\n",
" return 1. / (1. + np.exp(-x))\n",
"\n",
"y = pm.Bernoulli('y', sigmoid, observed=True, value=Y_train)\n",
"\n",
"model = pm.Model([w11, w12, w21, w22, w31, w32, y])\n",
"inference = pm.MCMC(model)\n",
"\n",
"inference.sample(number_of_samples)\n",
"print('Accuracy on train data = {}%'.format((y.value == Y_train).mean() * 100))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Solution: use a \"posterior predictive check\" (https://docs.pymc.io/notebooks/posterior_predictive.html https://stats.stackexchange.com/questions/115157/what-are-posterior-predictive-checks-and-what-makes-them-useful)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" [-----------------100%-----------------] 10000 of 10000 complete in 5.7 sec"
]
}
],
"source": [
"y_pred = pm.Bernoulli('y', sigmoid)\n",
"\n",
"model = pm.Model([w11, w12, w21, w22, w31, w32, y, y_pred])\n",
"inference = pm.MCMC(model)\n",
"\n",
"inference.sample(number_of_samples)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy on train data = 100.0%\n"
]
}
],
"source": [
"y_pred_samples = y_pred.trace()\n",
"y_pred_threshold = (y_pred_samples.mean(axis=0) > .5)\n",
"print('Accuracy on train data = {}%'.format((y_pred_threshold == Y_train).mean() * 100))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.9856, 0.9848, 0.4217, 0.9669, 0.017 , 0.0845, 0.9027, 0.0715,\n",
" 0.0148, 0.1049, 0.9801, 0.3527, 0.0198, 0.9633, 0.0203, 0.9824,\n",
" 0.0297, 0.9845, 0.9714, 0.0874, 0.0188, 0.014 , 0.9502, 0.0235,\n",
" 0.9833, 0.0392, 0.0153, 0.0138, 0.1002, 0.9865, 0.9623, 0.2509,\n",
" 0.4159, 0.6026, 0.9634, 0.9847, 0.0572, 0.9471, 0.9571, 0.9825,\n",
" 0.0152, 0.9388, 0.9852, 0.0173, 0.9792, 0.0143, 0.9842, 0.0515,\n",
" 0.9866, 0.0165, 0.0792, 0.0343, 0.044 , 0.8983, 0.9335, 0.9775,\n",
" 0.0294, 0.0766, 0.986 , 0.0221, 0.0682, 0.0611, 0.8558, 0.0816,\n",
" 0.0254, 0.9849, 0.415 , 0.0281, 0.984 , 0.7109, 0.9857, 0.1807,\n",
" 0.0454, 0.6199, 0.9682, 0.0487, 0.0142, 0.9728, 0.016 , 0.0268,\n",
" 0.0276, 0.0248, 0.0189, 0.9842, 0.9832, 0.753 , 0.9834, 0.985 ,\n",
" 0.9814, 0.5429, 0.0149, 0.9861, 0.7057, 0.9016, 0.201 , 0.9829,\n",
" 0.0226, 0.0343, 0.126 , 0.9498])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred_samples.mean(axis=0) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (dismod_env)",
"language": "python",
"name": "dismod_env"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment