AshNguyen/MCMC&VI 9.1 In-class Activity 2.ipynb

## MCMC&VI 9.1 In-class Activity 2.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Installing relevant packages, if you can't still import in the next cell\n",
    "#please restart the kernel\n",
    "%%bash \n",
    "\n",
    "pip install pyro-ppl\n",
    "pip install pystan\n",
    "pip install torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Importing packages\n",
    "import numpy as np\n",
    "import scipy.stats as st\n",
    "import matplotlib.pyplot as plt\n",
    "import pystan\n",
    "import torch\n",
    "import pyro\n",
    "import pyro.distributions as dist\n",
    "import pyro.contrib.autoguide as autoguide"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Generate the data, set seeds for reproducability\n",
    "pyro.set_rng_seed(25)\n",
    "\n",
    "#Number of data point\n",
    "N = 2500\n",
    "#Number of feature in the linear regression model \n",
    "#(y = bx + a + noise, b is the coefficient vector of length P)\n",
    "P = 8\n",
    "\n",
    "#Sample the true values of the parameters\n",
    "alpha_true = dist.Normal(42.0, 10.0).sample()\n",
    "beta_true = dist.Normal(torch.zeros(P), 10.0).sample()\n",
    "sigma_true = dist.Exponential(1.0).sample()\n",
    "\n",
    "#Data generation: first the noise is sampled from a normal, then the linear\n",
    "#regression y is constructed from the coefficients\n",
    "eps = dist.Normal(0.0, sigma_true).sample([N])\n",
    "x = torch.randn(N, P)\n",
    "y = alpha_true + x @ beta_true + eps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TRUE ALPHA:\n",
      "           53.780556\n",
      "TRUE BETAs:\n",
      "           1.0556217\n",
      "           15.313398\n",
      "           6.3050537\n",
      "           9.003085\n",
      "           -4.0266004\n",
      "           2.8942971\n",
      "           -6.575501\n",
      "           9.545625\n",
      "TRUE SIGMA:\n",
      "           0.04604599\n"
     ]
    }
   ],
   "source": [
    "#True parameter values\n",
    "print('TRUE ALPHA:\\n          ', alpha_true.numpy())\n",
    "print('TRUE BETAs:')\n",
    "for _ in beta_true.numpy():\n",
    "    print(\"           \"+str(_))\n",
    "print('TRUE SIGMA:\\n          ', sigma_true.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_9668ba3a03de4e2c24a1b04fa6c99bd7 NOW.\n",
      "/Users/ash/anaconda3/lib/python3.6/site-packages/Cython/Compiler/Main.py:367: FutureWarning: Cython directive 'language_level' not set, using 2 for now (Py2). This will change in a later release! File: /var/folders/kg/zv3bhpmx19s5f8hsk7h4_wnc0000gn/T/tmpgvv4vqzo/stanfit4anon_model_9668ba3a03de4e2c24a1b04fa6c99bd7_2517093861311501633.pyx\n",
      "  tree = Parsing.p_module(s, pxd, full_module_name)\n"
     ]
    }
   ],
   "source": [
    "#MCMC: NUTS\n",
    "\n",
    "model = \"\"\"\n",
    "data {\n",
    "    int<lower = 0> N;\n",
    "    int<lower = 0> P;\n",
    "    matrix[N, P] x;\n",
    "    vector[N] y;\n",
    "}\n",
    "\n",
    "parameters {\n",
    "    real alpha;\n",
    "    vector[P] beta;\n",
    "    real<lower = 0.0> sigma;\n",
    "}\n",
    "\n",
    "model {\n",
    "    alpha ~ normal(0.0, 100.0);\n",
    "    beta ~ normal(0.0, 10.0);\n",
    "    sigma ~ normal(0.0, 10.0);\n",
    "    y ~ normal(alpha + x * beta, sigma);\n",
    "}\n",
    "\"\"\"\n",
    "stan_model = pystan.StanModel(model_code=model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = {\n",
    "    'N': N,\n",
    "    'P': P, \n",
    "    'x': x.numpy(), \n",
    "    'y': y.numpy()\n",
    "}\n",
    "\n",
    "stan_results = stan_model.sampling(data=data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:pystan:Automatic Differentiation Variational Inference (ADVI) is an EXPERIMENTAL ALGORITHM.\n",
      "WARNING:pystan:ADVI samples may be found on the filesystem in the file `/var/folders/kg/zv3bhpmx19s5f8hsk7h4_wnc0000gn/T/tmpri7nya1s/output.csv`\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "alpha :\n",
      "53.79226664476374\n",
      "beta[1] :\n",
      "1.0487500160839303\n",
      "beta[2] :\n",
      "15.317303615194234\n",
      "beta[3] :\n",
      "6.309254941687572\n",
      "beta[4] :\n",
      "9.000856559384452\n",
      "beta[5] :\n",
      "-4.021923031328582\n",
      "beta[6] :\n",
      "2.887808448220903\n",
      "beta[7] :\n",
      "-6.567869154474554\n",
      "beta[8] :\n",
      "9.54404953144369\n",
      "sigma :\n",
      "0.04863081193857977\n"
     ]
    }
   ],
   "source": [
    "#Stan can also do ADVI\n",
    "r = stan_model.vb(data=data)\n",
    "for a, b in zip(r['mean_pars'], r['mean_par_names']):\n",
    "    print(b+\" :\\n\"+str(a))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Inference for Stan model: anon_model_9668ba3a03de4e2c24a1b04fa6c99bd7.\n",
      "4 chains, each with iter=2000; warmup=1000; thin=1; \n",
      "post-warmup draws per chain=1000, total post-warmup draws=4000.\n",
      "\n",
      "          mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat\n",
      "alpha    53.78  1.0e-5 9.4e-4  53.78  53.78  53.78  53.78  53.78   8823    1.0\n",
      "beta[1]   1.05 10.0e-6 9.2e-4   1.05   1.05   1.05   1.05   1.06   8392    1.0\n",
      "beta[2]  15.31  1.1e-5 9.2e-4  15.31  15.31  15.31  15.32  15.32   7223    1.0\n",
      "beta[3]   6.31  1.1e-5 9.1e-4    6.3    6.3   6.31   6.31   6.31   6779    1.0\n",
      "beta[4]    9.0  1.0e-5 8.9e-4    9.0    9.0    9.0    9.0    9.0   7290    1.0\n",
      "beta[5]  -4.03  1.1e-5 9.2e-4  -4.03  -4.03  -4.03  -4.03  -4.02   7472    1.0\n",
      "beta[6]   2.89  1.1e-5 9.3e-4   2.89   2.89   2.89    2.9    2.9   7387    1.0\n",
      "beta[7]  -6.57  1.0e-5 8.9e-4  -6.58  -6.57  -6.57  -6.57  -6.57   7373    1.0\n",
      "beta[8]   9.54  1.0e-5 9.2e-4   9.54   9.54   9.54   9.55   9.55   7920    1.0\n",
      "sigma     0.05  2.3e-5 6.5e-4   0.04   0.05   0.05   0.05   0.05    781   1.01\n",
      "lp__    6467.8    0.06   2.27 6462.5 6466.5 6468.2 6469.4 6471.2   1608    1.0\n",
      "\n",
      "Samples were drawn using NUTS at Fri Mar  6 17:05:34 2020.\n",
      "For each parameter, n_eff is a crude measure of effective sample size,\n",
      "and Rhat is the potential scale reduction factor on split chains (at \n",
      "convergence, Rhat=1).\n"
     ]
    }
   ],
   "source": [
    "print(stan_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/ash/anaconda3/lib/python3.6/site-packages/pyro/infer/svi.py:53: FutureWarning: The `num_samples` argument to SVI is deprecated and will be removed in a future release. Use `pyro.infer.Predictive` class to draw samples from the posterior.\n",
      "  'samples from the posterior.', FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step:     0, ELBO loss: -264330.38\n",
      "step:  5000, ELBO loss: -26694.22\n",
      "step: 10000, ELBO loss: 8124.55\n",
      "step: 15000, ELBO loss: 8136.02\n",
      "step: 20000, ELBO loss: 8169.36\n",
      "step: 25000, ELBO loss: 8150.46\n"
     ]
    }
   ],
   "source": [
    "#VI: ELBO maximization with SVI\n",
    "\n",
    "#Optimizer parameters (here we use ADAM)\n",
    "LEARNING_RATE = 1e-2\n",
    "NUM_STEPS = 30000 #Number of steps for the optimizer\n",
    "NUM_SAMPLES = 3000 #Number of sample to generate\n",
    "\n",
    "#Define the model in pyro\n",
    "def model(x, y):\n",
    "    alpha = pyro.sample(\"alpha\", dist.Normal(0.0, 100.0))\n",
    "    beta = pyro.sample(\"beta\", dist.Normal(torch.zeros(P), 10.0))\n",
    "    sigma = pyro.sample(\"sigma\", dist.HalfNormal(10.0)) #To make sigma positive\n",
    "    mu = alpha + x @ beta\n",
    "    return pyro.sample(\"y\", dist.Normal(mu, sigma), obs=y)\n",
    "\n",
    "#A guide is the variational function for the model, this can be flexibly defined\n",
    "#in pyro, but we use an automatic guide here to perform mean field Gaussian in\n",
    "#the latent space\n",
    "guide = autoguide.AutoDiagonalNormal(model)\n",
    "optimiser = pyro.optim.Adam({\"lr\": LEARNING_RATE})\n",
    "loss = pyro.infer.JitTraceGraph_ELBO() #monitor the loss\n",
    "svi = pyro.infer.SVI(model, guide, optimiser, loss, num_samples=NUM_SAMPLES)\n",
    "\n",
    "losses = np.empty(NUM_STEPS)\n",
    "\n",
    "pyro.clear_param_store()\n",
    "\n",
    "#Print out the loss to make sure we are improving\n",
    "for step in range(NUM_STEPS):\n",
    "    losses[step] = svi.step(x, y)\n",
    "    if step % 5000 == 0:\n",
    "        print(f\"step: {step:>5}, ELBO loss: {losses[step]:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/ash/anaconda3/lib/python3.6/site-packages/pyro/infer/svi.py:89: FutureWarning: The `SVI.run` method is deprecated and will be removed in a future release. For inference, use `SVI.step` directly, and for predictions, use the `pyro.infer.Predictive` class.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "#Extract sampling results\n",
    "posterior = svi.run(x, y)\n",
    "support = posterior.marginal([\"alpha\", \"beta\", \"sigma\"]).support()\n",
    "\n",
    "data_dict = {k: np.expand_dims(v.detach().numpy(), 0) for k, v in support.items()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "alpha\n",
      "Mean: 53.784035; std: 0.00084500486\n",
      "beta\n",
      "Mean: 1.0557483; std: 0.0008716847\n",
      "Mean: 15.316631; std: 0.0009405777\n",
      "Mean: 6.309164; std: 0.0009186482\n",
      "Mean: 9.00104; std: 0.0008697203\n",
      "Mean: -4.0268145; std: 0.000937752\n",
      "Mean: 2.8951876; std: 0.0010656185\n",
      "Mean: -6.5724373; std: 0.0009571902\n",
      "Mean: 9.544284; std: 0.0010541768\n",
      "sigma\n",
      "Mean: 0.046012748; std: 0.00072656485\n"
     ]
    }
   ],
   "source": [
    "for k in data_dict.keys():\n",
    "    d = data_dict[k][0]\n",
    "    print(k)\n",
    "    if k == 'beta':\n",
    "        for _ in range(P):    \n",
    "            print(\"Mean: \"+str(np.mean(d[:,_]))+\"; std: \"+str(np.std(d[:,_])))\n",
    "    else: \n",
    "        print(\"Mean: \"+str(np.mean(d))+\"; std: \"+str(np.std(d)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Task\n",
    "\n",
    "1. Vary the NUM_SAMPLES, NUM_STEPS, LEARNING_RATE to see the effect of these parameters on the loss and results\n",
    "2. Plot the distributions of the sampled results, versus the true parameters\n",
    "3. Compare MCMC and VI in their sampling results, on both time and accuracy"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"#Installing relevant packages, if you can't still import in the next cell\n",
	"#please restart the kernel\n",
	"%%bash \n",
	"\n",
	"pip install pyro-ppl\n",
	"pip install pystan\n",
	"pip install torch"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"#Importing packages\n",
	"import numpy as np\n",
	"import scipy.stats as st\n",
	"import matplotlib.pyplot as plt\n",
	"import pystan\n",
	"import torch\n",
	"import pyro\n",
	"import pyro.distributions as dist\n",
	"import pyro.contrib.autoguide as autoguide"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"#Generate the data, set seeds for reproducability\n",
	"pyro.set_rng_seed(25)\n",
	"\n",
	"#Number of data point\n",
	"N = 2500\n",
	"#Number of feature in the linear regression model \n",
	"#(y = bx + a + noise, b is the coefficient vector of length P)\n",
	"P = 8\n",
	"\n",
	"#Sample the true values of the parameters\n",
	"alpha_true = dist.Normal(42.0, 10.0).sample()\n",
	"beta_true = dist.Normal(torch.zeros(P), 10.0).sample()\n",
	"sigma_true = dist.Exponential(1.0).sample()\n",
	"\n",
	"#Data generation: first the noise is sampled from a normal, then the linear\n",
	"#regression y is constructed from the coefficients\n",
	"eps = dist.Normal(0.0, sigma_true).sample([N])\n",
	"x = torch.randn(N, P)\n",
	"y = alpha_true + x @ beta_true + eps"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"TRUE ALPHA:\n",
	" 53.780556\n",
	"TRUE BETAs:\n",
	" 1.0556217\n",
	" 15.313398\n",
	" 6.3050537\n",
	" 9.003085\n",
	" -4.0266004\n",
	" 2.8942971\n",
	" -6.575501\n",
	" 9.545625\n",
	"TRUE SIGMA:\n",
	" 0.04604599\n"
	]
	}
	],
	"source": [
	"#True parameter values\n",
	"print('TRUE ALPHA:\\n ', alpha_true.numpy())\n",
	"print('TRUE BETAs:')\n",
	"for _ in beta_true.numpy():\n",
	" print(\" \"+str(_))\n",
	"print('TRUE SIGMA:\\n ', sigma_true.numpy())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_9668ba3a03de4e2c24a1b04fa6c99bd7 NOW.\n",
	"/Users/ash/anaconda3/lib/python3.6/site-packages/Cython/Compiler/Main.py:367: FutureWarning: Cython directive 'language_level' not set, using 2 for now (Py2). This will change in a later release! File: /var/folders/kg/zv3bhpmx19s5f8hsk7h4_wnc0000gn/T/tmpgvv4vqzo/stanfit4anon_model_9668ba3a03de4e2c24a1b04fa6c99bd7_2517093861311501633.pyx\n",
	" tree = Parsing.p_module(s, pxd, full_module_name)\n"
	]
	}
	],
	"source": [
	"#MCMC: NUTS\n",
	"\n",
	"model = \"\"\"\n",
	"data {\n",
	" int<lower = 0> N;\n",
	" int<lower = 0> P;\n",
	" matrix[N, P] x;\n",
	" vector[N] y;\n",
	"}\n",
	"\n",
	"parameters {\n",
	" real alpha;\n",
	" vector[P] beta;\n",
	" real<lower = 0.0> sigma;\n",
	"}\n",
	"\n",
	"model {\n",
	" alpha ~ normal(0.0, 100.0);\n",
	" beta ~ normal(0.0, 10.0);\n",
	" sigma ~ normal(0.0, 10.0);\n",
	" y ~ normal(alpha + x * beta, sigma);\n",
	"}\n",
	"\"\"\"\n",
	"stan_model = pystan.StanModel(model_code=model)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"data = {\n",
	" 'N': N,\n",
	" 'P': P, \n",
	" 'x': x.numpy(), \n",
	" 'y': y.numpy()\n",
	"}\n",
	"\n",
	"stan_results = stan_model.sampling(data=data)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"WARNING:pystan:Automatic Differentiation Variational Inference (ADVI) is an EXPERIMENTAL ALGORITHM.\n",
	"WARNING:pystan:ADVI samples may be found on the filesystem in the file `/var/folders/kg/zv3bhpmx19s5f8hsk7h4_wnc0000gn/T/tmpri7nya1s/output.csv`\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"alpha :\n",
	"53.79226664476374\n",
	"beta[1] :\n",
	"1.0487500160839303\n",
	"beta[2] :\n",
	"15.317303615194234\n",
	"beta[3] :\n",
	"6.309254941687572\n",
	"beta[4] :\n",
	"9.000856559384452\n",
	"beta[5] :\n",
	"-4.021923031328582\n",
	"beta[6] :\n",
	"2.887808448220903\n",
	"beta[7] :\n",
	"-6.567869154474554\n",
	"beta[8] :\n",
	"9.54404953144369\n",
	"sigma :\n",
	"0.04863081193857977\n"
	]
	}
	],
	"source": [
	"#Stan can also do ADVI\n",
	"r = stan_model.vb(data=data)\n",
	"for a, b in zip(r['mean_pars'], r['mean_par_names']):\n",
	" print(b+\" :\\n\"+str(a))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Inference for Stan model: anon_model_9668ba3a03de4e2c24a1b04fa6c99bd7.\n",
	"4 chains, each with iter=2000; warmup=1000; thin=1; \n",
	"post-warmup draws per chain=1000, total post-warmup draws=4000.\n",
	"\n",
	" mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat\n",
	"alpha 53.78 1.0e-5 9.4e-4 53.78 53.78 53.78 53.78 53.78 8823 1.0\n",
	"beta[1] 1.05 10.0e-6 9.2e-4 1.05 1.05 1.05 1.05 1.06 8392 1.0\n",
	"beta[2] 15.31 1.1e-5 9.2e-4 15.31 15.31 15.31 15.32 15.32 7223 1.0\n",
	"beta[3] 6.31 1.1e-5 9.1e-4 6.3 6.3 6.31 6.31 6.31 6779 1.0\n",
	"beta[4] 9.0 1.0e-5 8.9e-4 9.0 9.0 9.0 9.0 9.0 7290 1.0\n",
	"beta[5] -4.03 1.1e-5 9.2e-4 -4.03 -4.03 -4.03 -4.03 -4.02 7472 1.0\n",
	"beta[6] 2.89 1.1e-5 9.3e-4 2.89 2.89 2.89 2.9 2.9 7387 1.0\n",
	"beta[7] -6.57 1.0e-5 8.9e-4 -6.58 -6.57 -6.57 -6.57 -6.57 7373 1.0\n",
	"beta[8] 9.54 1.0e-5 9.2e-4 9.54 9.54 9.54 9.55 9.55 7920 1.0\n",
	"sigma 0.05 2.3e-5 6.5e-4 0.04 0.05 0.05 0.05 0.05 781 1.01\n",
	"lp__ 6467.8 0.06 2.27 6462.5 6466.5 6468.2 6469.4 6471.2 1608 1.0\n",
	"\n",
	"Samples were drawn using NUTS at Fri Mar 6 17:05:34 2020.\n",
	"For each parameter, n_eff is a crude measure of effective sample size,\n",
	"and Rhat is the potential scale reduction factor on split chains (at \n",
	"convergence, Rhat=1).\n"
	]
	}
	],
	"source": [
	"print(stan_results)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/ash/anaconda3/lib/python3.6/site-packages/pyro/infer/svi.py:53: FutureWarning: The `num_samples` argument to SVI is deprecated and will be removed in a future release. Use `pyro.infer.Predictive` class to draw samples from the posterior.\n",
	" 'samples from the posterior.', FutureWarning)\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"step: 0, ELBO loss: -264330.38\n",
	"step: 5000, ELBO loss: -26694.22\n",
	"step: 10000, ELBO loss: 8124.55\n",
	"step: 15000, ELBO loss: 8136.02\n",
	"step: 20000, ELBO loss: 8169.36\n",
	"step: 25000, ELBO loss: 8150.46\n"
	]
	}
	],
	"source": [
	"#VI: ELBO maximization with SVI\n",
	"\n",
	"#Optimizer parameters (here we use ADAM)\n",
	"LEARNING_RATE = 1e-2\n",
	"NUM_STEPS = 30000 #Number of steps for the optimizer\n",
	"NUM_SAMPLES = 3000 #Number of sample to generate\n",
	"\n",
	"#Define the model in pyro\n",
	"def model(x, y):\n",
	" alpha = pyro.sample(\"alpha\", dist.Normal(0.0, 100.0))\n",
	" beta = pyro.sample(\"beta\", dist.Normal(torch.zeros(P), 10.0))\n",
	" sigma = pyro.sample(\"sigma\", dist.HalfNormal(10.0)) #To make sigma positive\n",
	" mu = alpha + x @ beta\n",
	" return pyro.sample(\"y\", dist.Normal(mu, sigma), obs=y)\n",
	"\n",
	"#A guide is the variational function for the model, this can be flexibly defined\n",
	"#in pyro, but we use an automatic guide here to perform mean field Gaussian in\n",
	"#the latent space\n",
	"guide = autoguide.AutoDiagonalNormal(model)\n",
	"optimiser = pyro.optim.Adam({\"lr\": LEARNING_RATE})\n",
	"loss = pyro.infer.JitTraceGraph_ELBO() #monitor the loss\n",
	"svi = pyro.infer.SVI(model, guide, optimiser, loss, num_samples=NUM_SAMPLES)\n",
	"\n",
	"losses = np.empty(NUM_STEPS)\n",
	"\n",
	"pyro.clear_param_store()\n",
	"\n",
	"#Print out the loss to make sure we are improving\n",
	"for step in range(NUM_STEPS):\n",
	" losses[step] = svi.step(x, y)\n",
	" if step % 5000 == 0:\n",
	" print(f\"step: {step:>5}, ELBO loss: {losses[step]:.2f}\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/ash/anaconda3/lib/python3.6/site-packages/pyro/infer/svi.py:89: FutureWarning: The `SVI.run` method is deprecated and will be removed in a future release. For inference, use `SVI.step` directly, and for predictions, use the `pyro.infer.Predictive` class.\n",
	" FutureWarning)\n"
	]
	}
	],
	"source": [
	"#Extract sampling results\n",
	"posterior = svi.run(x, y)\n",
	"support = posterior.marginal([\"alpha\", \"beta\", \"sigma\"]).support()\n",
	"\n",
	"data_dict = {k: np.expand_dims(v.detach().numpy(), 0) for k, v in support.items()}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"alpha\n",
	"Mean: 53.784035; std: 0.00084500486\n",
	"beta\n",
	"Mean: 1.0557483; std: 0.0008716847\n",
	"Mean: 15.316631; std: 0.0009405777\n",
	"Mean: 6.309164; std: 0.0009186482\n",
	"Mean: 9.00104; std: 0.0008697203\n",
	"Mean: -4.0268145; std: 0.000937752\n",
	"Mean: 2.8951876; std: 0.0010656185\n",
	"Mean: -6.5724373; std: 0.0009571902\n",
	"Mean: 9.544284; std: 0.0010541768\n",
	"sigma\n",
	"Mean: 0.046012748; std: 0.00072656485\n"
	]
	}
	],
	"source": [
	"for k in data_dict.keys():\n",
	" d = data_dict[k][0]\n",
	" print(k)\n",
	" if k == 'beta':\n",
	" for _ in range(P): \n",
	" print(\"Mean: \"+str(np.mean(d[:,_]))+\"; std: \"+str(np.std(d[:,_])))\n",
	" else: \n",
	" print(\"Mean: \"+str(np.mean(d))+\"; std: \"+str(np.std(d)))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Task\n",
	"\n",
	"1. Vary the NUM_SAMPLES, NUM_STEPS, LEARNING_RATE to see the effect of these parameters on the loss and results\n",
	"2. Plot the distributions of the sampled results, versus the true parameters\n",
	"3. Compare MCMC and VI in their sampling results, on both time and accuracy"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.8"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}