ahartikainen/stan_example.ipynb

## stan_example.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# For Ravin Kumar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pystan\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import stats\n",
    "data = stats.norm.rvs(loc=30, scale=1, size=1000).flatten()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# prior"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "stan_code_prior = \"\"\"\n",
    "data {\n",
    "    int<lower=1> N;\n",
    "}\n",
    "parameters {\n",
    "    real mu; // Estimated parameter\n",
    "}\n",
    "\n",
    "model {\n",
    "    mu ~ normal(0, 1);\n",
    "}\n",
    "generated quantities {\n",
    "    real y_hat[N]; // prior prediction\n",
    "    for (n in 1:N) {\n",
    "        y_hat[n] = normal_rng(mu+20, 1);\n",
    "    }\n",
    "}\n",
    "\"\"\"\n",
    "stan_prior = pystan.StanModel(model_code=stan_code_prior)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat.\n",
      "To run all diagnostics call pystan.check_hmc_diagnostics(fit)\n"
     ]
    }
   ],
   "source": [
    "stan_data_prior = {\"N\" : len(data)}\n",
    "stan_fit_prior = stan_prior.sampling(data=stan_data_prior)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Inference for Stan model: anon_model_9e2c867eeb58fa5e5ed2bc37cd00496e.\n",
      "4 chains, each with iter=2000; warmup=1000; thin=1; \n",
      "post-warmup draws per chain=1000, total post-warmup draws=4000.\n",
      "\n",
      "       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat\n",
      "mu    -0.02    0.03   0.99  -1.99  -0.68  -0.04   0.65    1.9   1279    1.0\n",
      "lp__  -0.49    0.02    0.7  -2.58  -0.65  -0.22  -0.05-4.4e-4   1827    1.0\n",
      "\n",
      "Samples were drawn using NUTS at Tue Apr 23 22:26:54 2019.\n",
      "For each parameter, n_eff is a crude measure of effective sample size,\n",
      "and Rhat is the potential scale reduction factor on split chains (at \n",
      "convergence, Rhat=1).\n"
     ]
    }
   ],
   "source": [
    "print(stan_fit_prior.stansummary(pars=[\"mu\", \"lp__\"]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# posterior"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "stan_code_posterior = \"\"\"\n",
    "data {\n",
    "    int N;\n",
    "    real y[N]; // Observed data\n",
    "}\n",
    "parameters {\n",
    "    real mu; // Estimated parameter\n",
    "}\n",
    "model {\n",
    "    mu ~ normal(0, 1);\n",
    "    y  ~ normal(mu+20, 1);\n",
    "}\n",
    "generated quantities {\n",
    "    real y_hat[N]; // posterior prediction\n",
    "    real log_lik[N]; // log_likelihood\n",
    "    \n",
    "    for (n in 1:N) {\n",
    "        // Stan normal functions https://mc-stan.org/docs/2_19/functions-reference/normal-distribution.html\n",
    "        y_hat[n] = normal_rng(mu, 1);\n",
    "        log_lik[n] = normal_lpdf(y[n] | mu, 1);\n",
    "    }\n",
    "}\n",
    "\"\"\"\n",
    "stan_model_posterior = pystan.StanModel(model_code=stan_code_posterior)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat.\n",
      "To run all diagnostics call pystan.check_hmc_diagnostics(fit)\n"
     ]
    }
   ],
   "source": [
    "stan_data_posterior = dict(\n",
    "    y=data,\n",
    "    N=len(data)\n",
    ")\n",
    "stan_fit_posterior = stan_model_posterior.sampling(data=stan_data_posterior)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Inference for Stan model: anon_model_804203b3f322a673e7cacf6457ff2d3c.\n",
      "4 chains, each with iter=2000; warmup=1000; thin=1; \n",
      "post-warmup draws per chain=1000, total post-warmup draws=4000.\n",
      "\n",
      "       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat\n",
      "mu     9.98  9.3e-4   0.03   9.91   9.95   9.98   10.0  10.04   1179    1.0\n",
      "lp__ -526.1    0.02   0.74 -528.3 -526.3 -525.8 -525.6 -525.6   1480    1.0\n",
      "\n",
      "Samples were drawn using NUTS at Tue Apr 23 22:28:19 2019.\n",
      "For each parameter, n_eff is a crude measure of effective sample size,\n",
      "and Rhat is the potential scale reduction factor on split chains (at \n",
      "convergence, Rhat=1).\n"
     ]
    }
   ],
   "source": [
    "print(stan_fit_posterior.stansummary(pars=[\"mu\", \"lp__\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import arviz as az"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "idata = az.from_pystan(posterior=stan_fit_posterior,\n",
    "                       posterior_predictive=\"y_hat\", \n",
    "                       prior=stan_fit_prior, \n",
    "                       prior_predictive=\"y_hat\",\n",
    "                       observed_data=\"y\",\n",
    "                       log_likelihood=\"log_lik\",\n",
    "                      )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Inference data with groups:\n",
       "\t> posterior\n",
       "\t> sample_stats\n",
       "\t> posterior_predictive\n",
       "\t> prior\n",
       "\t> sample_stats_prior\n",
       "\t> prior_predictive\n",
       "\t> observed_data"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>sd</th>\n",
       "      <th>mcse_mean</th>\n",
       "      <th>mcse_sd</th>\n",
       "      <th>hpd_3%</th>\n",
       "      <th>hpd_97%</th>\n",
       "      <th>ess_mean</th>\n",
       "      <th>ess_sd</th>\n",
       "      <th>ess_bulk</th>\n",
       "      <th>ess_tail</th>\n",
       "      <th>r_hat</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>mu</th>\n",
       "      <td>9.976</td>\n",
       "      <td>0.032</td>\n",
       "      <td>0.001</td>\n",
       "      <td>0.001</td>\n",
       "      <td>9.919</td>\n",
       "      <td>10.037</td>\n",
       "      <td>1185.24</td>\n",
       "      <td>1184.893</td>\n",
       "      <td>1205.786</td>\n",
       "      <td>1623.983</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     mean     sd  mcse_mean  mcse_sd  hpd_3%  hpd_97%  ess_mean    ess_sd  \\\n",
       "mu  9.976  0.032      0.001    0.001   9.919   10.037   1185.24  1184.893   \n",
       "\n",
       "    ess_bulk  ess_tail  r_hat  \n",
       "mu  1205.786  1623.983    1.0  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "az.summary(idata, round_to=3)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# For Ravin Kumar"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pystan\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"from scipy import stats\n",
	"data = stats.norm.rvs(loc=30, scale=1, size=1000).flatten()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# prior"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"stan_code_prior = \"\"\"\n",
	"data {\n",
	" int<lower=1> N;\n",
	"}\n",
	"parameters {\n",
	" real mu; // Estimated parameter\n",
	"}\n",
	"\n",
	"model {\n",
	" mu ~ normal(0, 1);\n",
	"}\n",
	"generated quantities {\n",
	" real y_hat[N]; // prior prediction\n",
	" for (n in 1:N) {\n",
	" y_hat[n] = normal_rng(mu+20, 1);\n",
	" }\n",
	"}\n",
	"\"\"\"\n",
	"stan_prior = pystan.StanModel(model_code=stan_code_prior)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"WARNING: Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat.\n",
	"To run all diagnostics call pystan.check_hmc_diagnostics(fit)\n"
	]
	}
	],
	"source": [
	"stan_data_prior = {\"N\" : len(data)}\n",
	"stan_fit_prior = stan_prior.sampling(data=stan_data_prior)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Inference for Stan model: anon_model_9e2c867eeb58fa5e5ed2bc37cd00496e.\n",
	"4 chains, each with iter=2000; warmup=1000; thin=1; \n",
	"post-warmup draws per chain=1000, total post-warmup draws=4000.\n",
	"\n",
	" mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat\n",
	"mu -0.02 0.03 0.99 -1.99 -0.68 -0.04 0.65 1.9 1279 1.0\n",
	"lp__ -0.49 0.02 0.7 -2.58 -0.65 -0.22 -0.05-4.4e-4 1827 1.0\n",
	"\n",
	"Samples were drawn using NUTS at Tue Apr 23 22:26:54 2019.\n",
	"For each parameter, n_eff is a crude measure of effective sample size,\n",
	"and Rhat is the potential scale reduction factor on split chains (at \n",
	"convergence, Rhat=1).\n"
	]
	}
	],
	"source": [
	"print(stan_fit_prior.stansummary(pars=[\"mu\", \"lp__\"]))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# posterior"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"stan_code_posterior = \"\"\"\n",
	"data {\n",
	" int N;\n",
	" real y[N]; // Observed data\n",
	"}\n",
	"parameters {\n",
	" real mu; // Estimated parameter\n",
	"}\n",
	"model {\n",
	" mu ~ normal(0, 1);\n",
	" y ~ normal(mu+20, 1);\n",
	"}\n",
	"generated quantities {\n",
	" real y_hat[N]; // posterior prediction\n",
	" real log_lik[N]; // log_likelihood\n",
	" \n",
	" for (n in 1:N) {\n",
	" // Stan normal functions https://mc-stan.org/docs/2_19/functions-reference/normal-distribution.html\n",
	" y_hat[n] = normal_rng(mu, 1);\n",
	" log_lik[n] = normal_lpdf(y[n] \| mu, 1);\n",
	" }\n",
	"}\n",
	"\"\"\"\n",
	"stan_model_posterior = pystan.StanModel(model_code=stan_code_posterior)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"WARNING: Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat.\n",
	"To run all diagnostics call pystan.check_hmc_diagnostics(fit)\n"
	]
	}
	],
	"source": [
	"stan_data_posterior = dict(\n",
	" y=data,\n",
	" N=len(data)\n",
	")\n",
	"stan_fit_posterior = stan_model_posterior.sampling(data=stan_data_posterior)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Inference for Stan model: anon_model_804203b3f322a673e7cacf6457ff2d3c.\n",
	"4 chains, each with iter=2000; warmup=1000; thin=1; \n",
	"post-warmup draws per chain=1000, total post-warmup draws=4000.\n",
	"\n",
	" mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat\n",
	"mu 9.98 9.3e-4 0.03 9.91 9.95 9.98 10.0 10.04 1179 1.0\n",
	"lp__ -526.1 0.02 0.74 -528.3 -526.3 -525.8 -525.6 -525.6 1480 1.0\n",
	"\n",
	"Samples were drawn using NUTS at Tue Apr 23 22:28:19 2019.\n",
	"For each parameter, n_eff is a crude measure of effective sample size,\n",
	"and Rhat is the potential scale reduction factor on split chains (at \n",
	"convergence, Rhat=1).\n"
	]
	}
	],
	"source": [
	"print(stan_fit_posterior.stansummary(pars=[\"mu\", \"lp__\"]))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"import arviz as az"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"idata = az.from_pystan(posterior=stan_fit_posterior,\n",
	" posterior_predictive=\"y_hat\", \n",
	" prior=stan_fit_prior, \n",
	" prior_predictive=\"y_hat\",\n",
	" observed_data=\"y\",\n",
	" log_likelihood=\"log_lik\",\n",
	" )"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Inference data with groups:\n",
	"\t> posterior\n",
	"\t> sample_stats\n",
	"\t> posterior_predictive\n",
	"\t> prior\n",
	"\t> sample_stats_prior\n",
	"\t> prior_predictive\n",
	"\t> observed_data"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"idata"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>mean</th>\n",
	" <th>sd</th>\n",
	" <th>mcse_mean</th>\n",
	" <th>mcse_sd</th>\n",
	" <th>hpd_3%</th>\n",
	" <th>hpd_97%</th>\n",
	" <th>ess_mean</th>\n",
	" <th>ess_sd</th>\n",
	" <th>ess_bulk</th>\n",
	" <th>ess_tail</th>\n",
	" <th>r_hat</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>mu</th>\n",
	" <td>9.976</td>\n",
	" <td>0.032</td>\n",
	" <td>0.001</td>\n",
	" <td>0.001</td>\n",
	" <td>9.919</td>\n",
	" <td>10.037</td>\n",
	" <td>1185.24</td>\n",
	" <td>1184.893</td>\n",
	" <td>1205.786</td>\n",
	" <td>1623.983</td>\n",
	" <td>1.0</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" mean sd mcse_mean mcse_sd hpd_3% hpd_97% ess_mean ess_sd \\\n",
	"mu 9.976 0.032 0.001 0.001 9.919 10.037 1185.24 1184.893 \n",
	"\n",
	" ess_bulk ess_tail r_hat \n",
	"mu 1205.786 1623.983 1.0 "
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"az.summary(idata, round_to=3)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}