dfm/finished.ipynb

## finished.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "%config InlineBackend.figure_format = \"retina\"\n",
    "from matplotlib import rcParams\n",
    "rcParams[\"savefig.dpi\"] = 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import emcee                         # http://dfm.io/emcee\n",
    "import corner                        # https://github.com/dfm/corner.py\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as pl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(\"emcee:\", emcee.__version__)\n",
    "print(\"corner:\", corner.__version__)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Generate some fake data:**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def mean_model(theta, x):\n",
    "    return theta[0] * x + theta[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.random.seed(1234)  # live coding\n",
    "\n",
    "theta_true = np.array([1.234, -0.01, 2*np.log(0.9)])  # m, b, ln(s^2)\n",
    "\n",
    "x = np.sort(np.random.uniform(-5, 5, 20))\n",
    "yerr = np.random.uniform(0.3, 0.5, len(x))\n",
    "y = mean_model(theta_true, x)\n",
    "y += np.sqrt(yerr**2 + np.exp(theta_true[2])) * np.random.randn(len(y))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Plot the data and the truth:**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "x0 = np.linspace(-5, 5, 500)\n",
    "\n",
    "pl.errorbar(x, y, yerr=yerr, fmt=\".k\", capsize=0)\n",
    "pl.plot(x0, mean_model(theta_true, x0), \"g\", lw=1.25)\n",
    "pl.xlabel(\"x\")\n",
    "pl.ylabel(\"y\")\n",
    "pl.xlim(-5.1, 5.1)\n",
    "pl.ylim(-5.8, 5.8);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Compute the maximum likelihood line:**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "A = np.vander(x, 2)\n",
    "print(A[:3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ATA = np.dot(A.T, A / yerr[:, None]**2)\n",
    "theta_ml = np.linalg.solve(ATA, np.dot(A.T, y / yerr**2))\n",
    "theta_ml_cov = np.linalg.inv(ATA)\n",
    "\n",
    "theta_ml_samples = np.random.multivariate_normal(theta_ml, theta_ml_cov, 50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pl.errorbar(x, y, yerr=yerr, fmt=\".k\", capsize=0)\n",
    "pl.plot(x0, mean_model(theta_true, x0), \"g\", lw=1.25)\n",
    "for t in theta_ml_samples:\n",
    "    pl.plot(x0, mean_model(t, x0), \"r\", alpha=0.1)\n",
    "pl.xlabel(\"x\")\n",
    "pl.ylabel(\"y\")\n",
    "pl.xlim(-5.1, 5.1)\n",
    "pl.ylim(-5.8, 5.8);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**The probabilistic model:**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def lnlike(theta):\n",
    "    sig2 = yerr**2 + np.exp(theta[2])\n",
    "    mean = mean_model(theta, x)\n",
    "    return -0.5*np.sum((y - mean)**2 / sig2 + np.log(sig2))\n",
    "\n",
    "def lnprior(theta):\n",
    "    if np.any(theta < -10) or np.any(theta > 10):\n",
    "        return -np.inf\n",
    "    m = theta[0]\n",
    "    return -1.5 * np.log(1 + m**2)\n",
    "\n",
    "def lnprob(theta):\n",
    "    lp = lnprior(theta)\n",
    "    if not np.isfinite(lp):\n",
    "        return -np.inf\n",
    "    return lnlike(theta) + lp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Run MCMC (burn in):**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ndim, nwalkers = 3, 36\n",
    "initial_coords = 0.1*np.random.randn(nwalkers, ndim)\n",
    "\n",
    "sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)\n",
    "coords, _, _ = sampler.run_mcmc(initial_coords, 500)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pl.plot(sampler.chain[:, :, 2].T, alpha=0.5);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Run MCMC (production):**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sampler.reset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "coords, _, _ = sampler.run_mcmc(coords, 500)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pl.plot(sampler.chain[:, :, 2].T, alpha=0.5);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "sampler.get_autocorr_time()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Plot results:**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "samples = sampler.flatchain\n",
    "corner.corner(samples, truths=theta_true, quantiles=[0.16, 0.84], labels=[\"$m$\", \"$b$\", \"$\\ln s^2$\"]);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pl.errorbar(x, y, yerr=yerr, fmt=\".k\", capsize=0)\n",
    "pl.plot(x0, mean_model(theta_true, x0), \"g\", lw=1.25)\n",
    "for t in samples[np.random.randint(len(samples), size=50)]:\n",
    "    pl.plot(x0, mean_model(t, x0), \"b\", alpha=0.1)\n",
    "pl.xlabel(\"x\")\n",
    "pl.ylabel(\"y\")\n",
    "pl.xlim(-5.1, 5.1)\n",
    "pl.ylim(-5.8, 5.8);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"%matplotlib inline\n",
	"%config InlineBackend.figure_format = \"retina\"\n",
	"from matplotlib import rcParams\n",
	"rcParams[\"savefig.dpi\"] = 100"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import emcee # http://dfm.io/emcee\n",
	"import corner # https://github.com/dfm/corner.py\n",
	"\n",
	"import numpy as np\n",
	"import matplotlib.pyplot as pl"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"print(\"emcee:\", emcee.__version__)\n",
	"print(\"corner:\", corner.__version__)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Generate some fake data:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def mean_model(theta, x):\n",
	" return theta[0] * x + theta[1]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"np.random.seed(1234) # live coding\n",
	"\n",
	"theta_true = np.array([1.234, -0.01, 2*np.log(0.9)]) # m, b, ln(s^2)\n",
	"\n",
	"x = np.sort(np.random.uniform(-5, 5, 20))\n",
	"yerr = np.random.uniform(0.3, 0.5, len(x))\n",
	"y = mean_model(theta_true, x)\n",
	"y += np.sqrt(yerr*2 + np.exp(theta_true[2])) np.random.randn(len(y))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Plot the data and the truth:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"x0 = np.linspace(-5, 5, 500)\n",
	"\n",
	"pl.errorbar(x, y, yerr=yerr, fmt=\".k\", capsize=0)\n",
	"pl.plot(x0, mean_model(theta_true, x0), \"g\", lw=1.25)\n",
	"pl.xlabel(\"x\")\n",
	"pl.ylabel(\"y\")\n",
	"pl.xlim(-5.1, 5.1)\n",
	"pl.ylim(-5.8, 5.8);"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Compute the maximum likelihood line:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"A = np.vander(x, 2)\n",
	"print(A[:3])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"ATA = np.dot(A.T, A / yerr[:, None]**2)\n",
	"theta_ml = np.linalg.solve(ATA, np.dot(A.T, y / yerr**2))\n",
	"theta_ml_cov = np.linalg.inv(ATA)\n",
	"\n",
	"theta_ml_samples = np.random.multivariate_normal(theta_ml, theta_ml_cov, 50)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pl.errorbar(x, y, yerr=yerr, fmt=\".k\", capsize=0)\n",
	"pl.plot(x0, mean_model(theta_true, x0), \"g\", lw=1.25)\n",
	"for t in theta_ml_samples:\n",
	" pl.plot(x0, mean_model(t, x0), \"r\", alpha=0.1)\n",
	"pl.xlabel(\"x\")\n",
	"pl.ylabel(\"y\")\n",
	"pl.xlim(-5.1, 5.1)\n",
	"pl.ylim(-5.8, 5.8);"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"The probabilistic model:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def lnlike(theta):\n",
	" sig2 = yerr**2 + np.exp(theta[2])\n",
	" mean = mean_model(theta, x)\n",
	" return -0.5np.sum((y - mean)*2 / sig2 + np.log(sig2))\n",
	"\n",
	"def lnprior(theta):\n",
	" if np.any(theta < -10) or np.any(theta > 10):\n",
	" return -np.inf\n",
	" m = theta[0]\n",
	" return -1.5 * np.log(1 + m**2)\n",
	"\n",
	"def lnprob(theta):\n",
	" lp = lnprior(theta)\n",
	" if not np.isfinite(lp):\n",
	" return -np.inf\n",
	" return lnlike(theta) + lp"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Run MCMC (burn in):"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"ndim, nwalkers = 3, 36\n",
	"initial_coords = 0.1*np.random.randn(nwalkers, ndim)\n",
	"\n",
	"sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)\n",
	"coords, _, _ = sampler.run_mcmc(initial_coords, 500)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pl.plot(sampler.chain[:, :, 2].T, alpha=0.5);"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Run MCMC (production):"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"sampler.reset()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"coords, _, _ = sampler.run_mcmc(coords, 500)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pl.plot(sampler.chain[:, :, 2].T, alpha=0.5);"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"sampler.get_autocorr_time()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Plot results:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"samples = sampler.flatchain\n",
	"corner.corner(samples, truths=theta_true, quantiles=[0.16, 0.84], labels=[\"$m$\", \"$b$\", \"$\\ln s^2$\"]);"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pl.errorbar(x, y, yerr=yerr, fmt=\".k\", capsize=0)\n",
	"pl.plot(x0, mean_model(theta_true, x0), \"g\", lw=1.25)\n",
	"for t in samples[np.random.randint(len(samples), size=50)]:\n",
	" pl.plot(x0, mean_model(t, x0), \"b\", alpha=0.1)\n",
	"pl.xlabel(\"x\")\n",
	"pl.ylabel(\"y\")\n",
	"pl.xlim(-5.1, 5.1)\n",
	"pl.ylim(-5.8, 5.8);"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.4.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}