Skip to content

Instantly share code, notes, and snippets.

@AustinRochford
Last active September 18, 2016 21:57
Show Gist options
  • Save AustinRochford/192e13394e4b76d6d1648b4ee81f7bc7 to your computer and use it in GitHub Desktop.
Save AustinRochford/192e13394e4b76d6d1648b4ee81f7bc7 to your computer and use it in GitHub Desktop.
PyMC3 ADVI Latend Dirichlet Allocation
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import lda\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np\n",
"import pymc3 as pm\n",
"from pymc3.distributions.dist_math import bound, factln\n",
"import seaborn as sns\n",
"from theano import tensor as tt"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"N_TOPICS = 5\n",
"\n",
"N_DOCUMENTS = 99\n",
"N_WORDS = 100"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data = lda.datasets.load_reuters()[:N_DOCUMENTS, :N_WORDS]\n",
"word_counts = data.sum(axis=1)\n",
"n_documents = data.shape[0]\n",
"\n",
"vocab = np.array(lda.datasets.load_reuters_vocab())\n",
"titles = lda.datasets.load_reuters_titles()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If $\\mathbf{y}_d$ is the vector of word counts for the $d$-th document, LDA is the model\n",
"\n",
"$$\n",
"\\begin{align*}\n",
"\\boldsymbol{\\pi}_d\\ |\\ \\boldsymbol{\\alpha}\n",
" & \\sim \\textrm{Dir}(\\boldsymbol{\\alpha}) \\\\\n",
"z_d\\ |\\ \\boldsymbol{\\pi}_d\n",
" & \\sim \\textrm{Cat}(\\boldsymbol{\\pi}_d) \\\\\n",
"\\mathbf{w}_t\\ |\\ \\boldsymbol{\\beta}\n",
" & \\sim \\textrm{Dir}(\\boldsymbol{\\beta}) \\\\\n",
"\\mathbf{y}_d\\ | z_d, \\mathbf{W}\n",
" & \\sim \\textrm{Multinomial}(\\mathbf{w}_{z_d})\n",
"\\end{align*}\n",
"$$\n",
"\n",
"The many discrete latent variables $z_d$ can make MCMC inference take a long time to converge, due to poor mixing. To avoid this, we marginalize over $\\mathbf{z}$, which gives $\\mathbf{y}_d\\ |\\ \\boldsymbol{\\pi}_d, \\mathbf{W} \\sim \\textrm{Multinomial}(\\boldsymbol{\\pi}_d \\mathbf{W})$ (here we think of $\\boldsymbol{\\pi}_d$ as a one by the number of topics row vector). In order to fit this model efficiently in `pymc3`, we need to following class that generalizes `pymc3.Multinomial` to allow a vector of observations, each with different `n` and `p`."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class ManyMultinomials(pm.Discrete):\n",
" def __init__(self, n, p, *args, **kwargs):\n",
" super(ManyMultinomials, self).__init__(*args, **kwargs)\n",
" \n",
" self.n = n\n",
" self.p = p\n",
" \n",
" def logp(self, x):\n",
" n = self.n\n",
" p = self.p\n",
" \n",
" return bound(\n",
" factln(n).sum() + tt.sum(x * tt.log(p) - factln(x)),\n",
" tt.all(x >= 0), tt.all(x <= n[:, np.newaxis]), tt.all(tt.eq(tt.sum(x, axis=1), n)),\n",
" n >= 0\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Applied stickbreaking-transform to topic_word and added transformed topic_word_stickbreaking_ to model.\n",
"Applied stickbreaking-transform to doc_topic and added transformed doc_topic_stickbreaking_ to model.\n"
]
}
],
"source": [
"ALPHA = 1.\n",
"BETA = 1.\n",
"\n",
"with pm.Model() as model:\n",
" topic_word = pm.Dirichlet('topic_word', ALPHA * np.ones(N_WORDS),\n",
" shape=(N_TOPICS, N_WORDS))\n",
" doc_topic = pm.Dirichlet('doc_topic', BETA * np.ones(N_TOPICS),\n",
" shape=(N_DOCUMENTS, N_TOPICS))\n",
" doc_word = pm.Deterministic('doc_word', doc_topic.dot(topic_word))\n",
" \n",
" words = ManyMultinomials('words', word_counts, doc_word, observed=data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Variational inference should scale better than MCMC here."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iteration 0 [0%]: ELBO = -1062761.42\n",
"Iteration 5000 [10%]: Average ELBO = -973099.59\n",
"Iteration 10000 [20%]: Average ELBO = -913868.33\n",
"Iteration 15000 [30%]: Average ELBO = -794061.01\n",
"Iteration 20000 [40%]: Average ELBO = -664975.6\n",
"Iteration 25000 [50%]: Average ELBO = -630011.2\n",
"Iteration 30000 [60%]: Average ELBO = -622648.33\n",
"Iteration 35000 [70%]: Average ELBO = -621004.34\n",
"Iteration 40000 [80%]: Average ELBO = -620320.82\n",
"Iteration 45000 [90%]: Average ELBO = -619681.61\n",
"Finished [100%]: Average ELBO = -619300.12\n",
"CPU times: user 55.7 s, sys: 500 ms, total: 56.2 s\n",
"Wall time: 1min 3s\n"
]
}
],
"source": [
"%%time\n",
"with model:\n",
" advi_fit = pm.advi(n=50000)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiYAAAGCCAYAAADUqwgjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XlAlNXeB/DvzDAgsikyMCyKCpqYQosmhhcNkHFDIcG6\n3VsJtq+K2uLNS6VmpdeuN1s0zd56vVYuWG8oGpCouZSVoaSlFuECww6DINuc9w9yEmVm2GaD7+cv\nOM95nufHsYYv51mORAghQERERGQFpJYugIiIiOgKBhMiIiKyGgwmREREZDUYTIiIiMhqMJgQERGR\n1WAwISIiIqthtcHko48+wqRJkxATE4OVK1fq2teuXYvo6GhMnjwZBw4c0LXv27cPkyZNgkqlwrp1\n63Tt58+fx6xZs6BSqZCcnIzGxkYAQH19PebNm4fo6GjcdddduHjxotFzEBERkYkJK3T48GGRmJgo\nGhoahBBClJaWCiGEOHPmjJgxY4ZoaGgQ586dE1FRUUKr1YqmpiYRFRUlzp8/L+rr68X06dPFmTNn\nhBBCPP3002Lnzp1CCCH++c9/is2bNwshhNi0aZNISUkRQgiRlpYm5s6dK4QQ4vTp062eg4iIiEzP\nKmdMNm/ejAcffBB2dnYAAHd3dwBAZmYmpkyZAjs7O/j5+cHf3x85OTnIycmBv78/fH19IZfLMXXq\nVGRmZgIADh8+DJVKBQCIi4tDRkaG7lhxcXEAAJVKhcOHDwMAsrKyWj0HERERmZ5VBpO8vDwcPXoU\ns2bNwr333osTJ04AANRqNby9vXX9vLy8oFarW20vKipCeXk53NzcIJU2/5hKpRJqtRoAUFRUBKVS\nCQCQyWRwcXFBRUWF3nMQERGR6dlZ6sSJiYkoKSm5rn3u3LloampCVVUVPv30U+Tk5ODpp59GZmYm\nRCtvz5dIJNBqta2eQwhx3T4SiUS3rbVj6WsnIiIi07NYMNm4caPebR9//DGio6MBAMHBwZDJZCgv\nL4dSqURBQYGuX2FhITw9PSGEaHHzqlqthqenJ9zd3VFVVQWtVgupVKrrDzTPhBQWFsLLywtNTU3Q\naDRwc3PTew5jhBAMMERERJ1ksWBiSFRUFA4dOoTRo0fjt99+Q0NDA/r27YuIiAgsWLAAs2fPhlqt\nRn5+PoKDg6HVapGfn48LFy5AoVAgLS0Nq1atAgCEhoYiPT0dU6ZMQWpqKiIjIwEAERERSE1NRUhI\nCNLT0xEaGqprb+0cxkgkEhQXa0w3KASFwoVjbGIcY9PjGJsHx9n0FAoXkxzXKoPJnXfeiUWLFiEm\nJgZyuRyvvfYaACAwMBCTJ0/G1KlTYWdnh5SUFEgkEshkMixevBhJSUkQQiA+Ph4BAQEAgPnz5yM5\nORmrV69GUFAQ4uPjAQAJCQlYuHAhoqOj0adPH12Q0XcOIiIiMj2JaO2mCuoQpnPT4l9ApscxNj2O\nsXlwnE3PVDMmVvlUDhEREfVMDCZERERkNRhMiIiIyGowmBAREZHVYDAhIiIiq2GVjwsTERFZA60Q\nkOp5Y7gQQGOTFhKJBBLJny/aFEKgsUlAbidF1aV61NQ1wt5Oitq6Jjj2skPt5UbdcQUEyjV1sJfL\nUFByCRKJBH2cHVCmuYyyqsto0go495KjoUmLiuo6NGkFKqvr4e7iAHV5LWrqGqEuq0GTVsCltxyX\n65vg7d4bAkBFdR00NQ2wl0tR39D6G9I74//+NaPLjwkwmBARWVTz0hnNvwAbGrW6X3bFFbVwcbJH\n1aV6VFbXo7TqMuobmtDXxQE/5ZXj9PkKlFRehr1cCl8PJ/xWwEdjezpNTQMAIL+oukW7KUKJKTGY\nEFGPIoRAXUMTyqrqUFp1GV8fL0DN5Uac+K3M0qV1SH2DlqGEuhUGEyKyKUIIaGobkPXdeXz+dZ6l\nyyErcetQBeRyKXJ/K9PNHOgzc/xgBAd4QG4nRWnlZfRz64XT5yowtH8f2Mtl6OviACEELl1uRGFZ\nDTzceiF5zdctjuGncMa02/3h1bc3Vn78A5wc5Sgqr9Vt9+7XG3eGB8BeLkVfZwf8N+MX3DNxKLRa\ngRc3fovwEB9E3OILHw8nXK5vglQigb1cirqGJvxeqMHwge4AAHVZDdxdHWAnk6JJKyCVSvDAa18B\nAP71eBhkUglcnewBAA2NTfj3lhyMG+mNsSOUyFdr4CCXwcu9NxqbtLCTNd9WWl3bgO3ZZzE51B+K\nPo66mq/M3kmlln3bOd/82oX4lkHT4pscTc+SY9zYpEX2sYvY9OUvFjl/W024yQd7j11ESEA/9HKw\ng1YrMNjHFZ9knWnRL3p0f0y7fSCcHeW6exMamwQ8PV2wYcdxfHEwDwDw/nMR151DKwTqG5rQy775\nb8ekV7MAAO/OH49Llxvx49kSjB7mCadect0+ZVWX0cveDqfyy/HR7p9Reakebk72WPHY7Xhoxd5W\nz3XluP95+i9wdmw+lhACc/74xTfAyxn56j8vC7g52+PF2aNRW9+Efq4OaGwS2Jp9FjcOdMeJ38pQ\nc7kBP5wuQdStfth1JB8A8MJ9ozDYxxW1dY14/I19AIAXE0fDz9NZ9wt23cIJuhoTJgRgy96zGODp\njBeTbsPvhRo4OdrhxzOl2PTlL5g8ZgDiJwRAU9sA1972ev+dCqvqsOjtr3HHzb7428Sh0Aqhdxza\nQl1eg9q6RnyceQZTQv0RHNCvxfaayw3Ymv0rAn1d8dmB3/B0fAh8PJxaPdbV9610hFYrUF3boAsk\nltKj1sohou6nrr4JJ34rxVupJ8xyvn6uvXDjoL6IGtUfXn17w04mgVYIyKSdfxjxvknDrmsb7OOK\n/0n/GXKZFL+rNQjwddP9sr+y3pbcTgI7mRTBAf3wxcE8DPFza/X4UolEF0oAYOVjt6OuoQn2chns\n5TJMuMn3un3cXXsBAG4ZqsAtQxVobNJCKpVAKpHgqZnB8OjT67p9Rg7uh+O/lqJ3rz/PJZFI4NXX\nEeryWijde+P5v9+K1//7ve5ykZuzA65ULbcD7o2+QXde4M8bQEcM7ofvfi7CIO/mX16ODnZ4MXE0\n+ro4wOWaQGEnk2LtgvHIV1djsI8rbhzkrvtL3l/ZvP8dt/hioLcL/L1cIJFIDIYSABgZ4NEigEjR\nuVkAr769AQDP/e2WVrf37iXHfarmsbh9hLfBY3UmlADNMxqWDiWmxGBCRF2qSavF/+75BdnHLnbp\ncSeHDsDUUH84Oth1eGFNmQkX5Bzi1wdLHxiDuoYm/HaxCjcM6KO3b6CvGxb9/Vb4ebb+F/W1roSO\n9rgybQ8ANw3xaLXP3ITgVv96d3ftBXV5LVyd7OEgl6G/pzN+K9C06Vf7lX+bIP++CPLv22LbAC/9\nf2HL7WQI8HXT208qkSDAp/Ug11b3RA1pMctE1onBhIg6rKGxCduyf8Web8916jgTbvJBTNgg9HVx\n6KLKLMdBLsOwa34htyZQz2yJOUkkklbD2gPThiPj6DlMHTsQAHSzN1dmgLrKkjm3oaausUuPaUjU\nqP5mOxd1HIMJEbXZheJqLN7wTYf29e7XG1PH+sOzb2/4e7lAbsf3O1qrvi4OSLgjUPd9TNhA1NY1\nYspY/y49j6/CuUuPR90DgwkR6dWk1eLB1/e2e7+4vwzCxNH9W9wnQbbLqZcciVOCLF0G9RD81CCi\nFto7KxJ5ix/iJwTAwV5mwqqIqKdgMCHq4RqbtDh4ohAf7DrVpv6uTvaYmxCMAZ4uFn/fARF1Pwwm\nRD1UQ2MTHl6Z3aa+d0UEIvJWP8ikkg4/EUNE1BYMJkQ9zNkLlVj20XdG+yXfFYJhA/q2eOyUiMjU\nGEyIeoj/bM3BsTMlRvv9PXooJtzs2+mXQBERdQSDCVE3Jv54DXeT1vDKEw/FDMfIgH4Y2N+dr/0n\nIotiMCHqplZs/gEnfy832m/1U+Oue0U4EZGlMJgQdSONTVp8+tUZZBw9b7Dfontvxe+FGgT592Uo\nISKrwmBC1A1U1zbgzW05OH2+0mC/2HGDMH3cIADN67UQEVkbBhMiG1dWdRkL3j5otN8rD4VC6d7b\nDBUREXUcgwmRDUt6Ncvg9sdiR8DN2R5OveQMJURkExhMiGyUsVDy7vzxsJfzNfFEZFsYTIhszJff\nnsPmzNMG+zx7z80MJURkkxhMiGzI6//9HqfyK/Rvf3QsPNwczVgREVHXYjAhsgG/F2rw0gffGuzz\n7yfHwdWJj/4SkW1jMCGyck+8sQ81dY16t984sC/m332zGSsiIjIdBhMiK1VVU4+5/zlgsM+IQe54\ncuZIM1VERGR6DCZEVqihUWs0lABA8l03maEaIiLzYTAhsjJaIfDwyr16t/sqnPDCvaNwuaHJfEUR\nEZkJgwmRFSmuqMWz7x7Su33x/aMwyNsVAOBgz8eBiaj7YTAhshJph/KwLftXvdsnjRmgCyVERN0V\ngwmRFfitoMpgKPnX42Ho6+JgxoqIiCxDaukCiHq62rpGLPmfowb7MJQQUU/BGRMiCzr5ezlWbP7B\nYJ/3n4swUzVERJbHGRMiC6mrbzIaSmaOH2ymaoiIrANnTIgs5NFV2Qa3r3zsdri79jJTNURE1oHB\nhMjM6hua8NTq/Qb7JN8VwlBCRD0SgwmRmb37WS7qG7V6ty+8+yYEDXQ3Y0VERNaD95gQmdGb23Jw\n7EyJwT4MJUTUk3HGhMhMDuUW4ofT+kPJv58cB0cH/i9JRD0bPwWJzOS9//vJ4HZXJ3szVUJEZL14\nKYfIDJJezbJ0CURENsEqg8m8efMQFxeHuLg4REREIC4uTrdt7dq1iI6OxuTJk3HgwJ/Lwu/btw+T\nJk2CSqXCunXrdO3nz5/HrFmzoFKpkJycjMbGRgBAfX095s2bh+joaNx11124ePGi0XMQdYQQwuD2\nwT6uGHujl5mqISKyblZ5KeeNN97Qff3aa6/BxcUFAHD27Fns2rULO3fuRGFhIRITE7Fnzx4IIbBk\nyRJ88MEH8PT0RHx8PCIjIxEQEICVK1ciMTERkydPRkpKCrZu3Yq7774bW7duhZubG/bs2YOdO3di\nxYoVeOONN3DmzJlWzyGRSCw1HGTDyqouY8HbB/Vu91M444X7RpmxIiIi62aVMyZX27VrF2JiYgAA\nmZmZmDJlCuzs7ODn5wd/f3/k5OQgJycH/v7+8PX1hVwux9SpU5GZmQkAOHz4MFQqFQAgLi4OGRkZ\numNdmYlRqVQ4fPgwACArK6vVcxB1hKFQAgD/uPdWM1VCRGQbrDqYHD16FB4eHujfvz8AQK1Ww9vb\nW7fdy8sLarW61faioiKUl5fDzc0NUmnzj6lUKqFWqwEARUVFUCqVAACZTAYXFxdUVFToPQdRe5VV\nXTa4fdQNCjjYy8xUDRGRbbDYpZzExESUlFz/6OS8efMQEdG8aNkXX3yBadOm6ba1dq1eIpFAq239\nZVVCiOv2uXJJRt+x9LUTtcfeHy7gw90/693+wLQg3D7CW+92IqKeymLBZOPGjQa3NzU14csvv8T2\n7dt1bUqlEgUFBbrvCwsL4enpCSFEi5tX1Wo1PD094e7ujqqqKmi1WkilUl1/oHkmpLCwEF5eXmhq\naoJGo4Gbm5vec7SFQuHSpn7UcbYyxoZCyUOxIxHzF+tdnM9WxtiWcYzNg+Nsm6zy5lcA+PrrrzF4\n8GB4ef35tEJERAQWLFiA2bNnQ61WIz8/H8HBwdBqtcjPz8eFCxegUCiQlpaGVatWAQBCQ0ORnp6O\nKVOmIDU1FZGRkbpjpaamIiQkBOnp6QgNDTV4jrYoLtZ08SjQ1RQKF6sf44rqOqNvdg0dprDan8MW\nxtjWcYzNg+NseqYKflYbTHbt2tXiMg4ABAYGYvLkyZg6dSrs7OyQkpICiUQCmUyGxYsXIykpCUII\nxMfHIyAgAAAwf/58JCcnY/Xq1QgKCkJ8fDwAICEhAQsXLkR0dDT69OmjCzL6zkHUFss+/A6lRu4t\nISIi/STC2EsWqM2Yzk3LFv4CMvYitbsjAhF92wAzVdN+tjDGto5jbB4cZ9PrcTMmRLbm+1+K9W6z\nt5Pi3QUTzFcMEZGNsurHhYlsyZrtx/Vue/WRsWashIjIdjGYEHWB4opag9v7ODuYqRIiItvGSzlE\nnfDNSTXWfpYLQzdqvZM83mz1EBHZOs6YEHXC+2knDYYSAJDL+b8ZEVFb8ROTqBPqG1t/6/DVpHzc\nnIiozRhMiDqoLU/av50cboZKiIi6DwYTog7QagVeWH/EaL9e9ryNi4ioPRhMiDrg4IlCFJTWGOyj\n6NPLTNUQEXUf/HOOqAPOFVUb3P7AtCDcFuRlsA8REV2PwYSonU78Voovj57Tu/0vwd64fYS3GSsi\nIuo+eCmHqB20WoFVn/xosM89UUPNVA0RUffDGROidvjqhwsGt787fzzs5TIzVUNE1P1wxoSoHTZ9\n+YvB7QwlRESdw2BCREREVoPBhKiNdh3+3eD2JQ+MMVMlRETdF+8xITJCKwTOF1Vjy96zevs49bKD\nr4eTGasiIuqeGEyIjNix/zd8cTDPYJ+Ff73ZPMUQEXVzvJRDZMShEwUGt4+6QYEBXi5mqoaIqHtj\nMCEyorSqzuD2xClBZqqEiKj7YzAhMuDS5QajfRwdeEWUiKirMJgQGfDkv/cb3D7rjkAzVUJE1DMw\nmBB1UOy4QZg0ZoClyyAi6lYYTIj0KK6oNbh9dJCnmSohIuo5GEyI9Hj23UMGt3v343tLiIi6Gu/a\nI7qGEAJzXvvKYJ8lc24zUzVERD0LZ0yIrvHD6RKD2x0d7OCrcDZTNUREPQuDCdE1SisvG9zO2RIi\nItNhMCG6xrEzhmdM3F17makSIqKeh8GE6Bonfy+3dAlERD0Wb34l+oMQAinvf2vpMoiIejTOmBD9\n4cezpThfXG3pMoiIejQGEyIA6vIa/GdrjqXLICLq8XgphwjAsg+/M9rnzbl/weW6JjNUQ0TUczGY\nEAGorjW+irBTLzmcesnNUA0RUc/FSznU4/2cb/wpnOjR/c1QCRERMZhQj/faf38w2mfUDVywj4jI\nHBhMiNrAzdne0iUQEfUIDCZERjz3t1ug6ONo6TKIiHoEBhMiI4b272PpEoiIegwGE+rRSiprLV0C\nERFdhcGEeqwmrRbPvHPI0mUQEdFVGEyoxyqrqrN0CUREdA0GE+qxnn3X+GzJU/HBZqiEiIiuYDCh\nHqktb3oFgJsCPUxcCRERXY3BhHqkbdlnLV0CERG1gmvlUI+z/8eLyD52Ue/2m4d4wKW3HBNu9jVj\nVUREBFjpjMmpU6dw1113ITY2FvHx8cjJ+XM5+qVLlyI6OhozZszAyZMnde2pqalQqVRQqVTYsWOH\nrj03NxcxMTFQqVRYtmyZrr2yshJJSUlQqVSYM2cONBqN0XNQ97Bx1ymD25+cGYzZk4MwUOlqpoqI\niOgKqwwmK1aswJNPPokdO3bgySefxIoVKwAA2dnZyM/Px549e/Dyyy8jJSUFQHPIeOutt7B161Zs\n2bIFa9as0QWNF198EcuWLcPu3buRl5eH/fv3AwDWrVuHsWPHYvfu3RgzZgzWrl1r8BzUPRw9VWRw\n+7hgbzNVQkRErbHKYCKRSHTBQqPRwMvLCwCQmZmJ2NhYAEBISAg0Gg1KSkpw4MABhIWFwcXFBa6u\nrggLC8P+/ftRXFyMS5cuITi4+cmK2NhYZGRk6I4VFxcHAIiLi0NmZqbBc1D38PaOEwa3h4f4mKkS\nIiJqjVXeY/L888/jgQcewGuvvQYhBD7++GMAQFFREZRKpa6fUqmEWq2GWq2Gt/eff+l6eXnp2q/u\nf6UdAEpLS+Hh0fzEhUKhQFlZWavnuLLPlb7Uvbk4yi1dAhFRj2axYJKYmNjqTMS8efNw8OBB/OMf\n/0BUVBTS09OxaNEibNy4EUKIFn2FEJBIJNe1AzDYbkhH9iHb8Nb240b7eLn3NkMlRESkj8WCycaN\nG/Vue+aZZ/DCCy8AACZNmqT72svLC4WFhbp+hYWF8PT0hFKpxJEjR1q0h4aGQqlUoqCgQNeuVqvh\n6ekJAPDw8EBJSQk8PDxQXFwMd3d3g+doC4XCpU39qOM6M8bf/VJscLu3hxP/DcH/js2BY2weHGfb\nZJWXcry8vPDNN9/gtttuw6FDh+Dv7w8AiIyMxKZNmzBlyhQcO3YMrq6u8PDwwLhx4/DGG29Ao9FA\nq9Xi4MGDWLBgAVxdXeHs7IycnByMHDkSO3bswL333gsAiIiIwPbt2/HQQw8hNTUVkZGRBs/RFsXF\nGuOdqMMUCheTjvHjsSN6/L+hqceYOMbmwnE2PVMFP6sMJkuWLMHSpUuh1Wrh4OCAJUuWAADGjx+P\n7OxsTJw4EY6Ojli+fDkAwM3NDY899hhmzpwJiUSCJ554Aq6uzY96pqSk4Pnnn0ddXR3Cw8MRHh4O\nAHjwwQcxd+5cbNu2DT4+Pli9erXBc5Btq7xUb3D7rDsC4ePhZKZqiIhIH4lo7aYK6hCmc9PqzF9A\nSa9mGdz+/nMRHTpud8O/Mk2PY2weHGfTM9WMiVU+LkzUlQ7lFhrvREREVoHBhLq99/7vJ4Pbn73n\nZjNVQkRExjCYULdWUlFrtM8NA/qaoRIiImoLBhPq1n45X2HpEoiIqB0YTKhb+/ak4bVxbujfx0yV\nEBFRW1jl48JEnSWEwP+k/4wfz5Ya7LeQ95cQEVkVzphQt1RT14h9P1402Cd6dH9IudwAEZFVYTCh\nbqktb+fxcOtl+kKIiKhdGEyoWyrX1BntwzcLEhFZHwYT6paWfXTU0iUQEVEHMJhQt/NTXhnqG7SW\nLoOIiDqAwYS6nZUfH2tbR17LISKyOkaDSV5eHv76178iIqJ5kbPc3Fy8+eabJi+MqCNKKy+3ua+v\ngqsJExFZG6PB5MUXX8Sjjz4KF5fmVQSDgoKQnp5u8sKIOuKrHy4Y7fPWvHAs+vutGD7Q3QwVERFR\nexgNJhqNBuHh4ZD88b4HqVQKuVxu8sKI2ksrBHYe/t1oP0cHOwT6uZmhIiIiai+jb36VyWRoaGjQ\nBRO1Wg2plLemkHXR1NTj1U3fG+234tHbzVANERF1lNGEcc899+CJJ55AeXk53nzzTdxzzz1ISkoy\nR21EbfbR7p9RUFpjtF8/vlSNiMiqGZ0xiY2NhZ+fH7766ivU1tbitddew6hRo8xRG1Gbff9LidE+\n/7jvVjNUQkREnWE0mHz22WeYMWNGizBypY3IWmiNvIN+1RNh6OPsYKZqiIioo4xeyvnggw/a1EZk\nKcZCCQCGEiIiG6F3xuT48ePIyclBeXk5Nm3apGuvrq5GQ0ODWYojaotNX/5i6RKIiKiL6A0marUa\nJ06cQG1tLU6cOKFrd3JywvLly81SHFFbfPW98XeXEBGRbdAbTKKiohAVFYUDBw5g3Lhx5qyJiIiI\neiijN7+OGzcOv/76K06dOoX6+npde2xsrEkLI2qLo6eKjPaZmxBshkqIiKgrGA0mH374IT755BMU\nFxdj5MiROHr0KEaPHs1gQhb32KpsXK5vMtgneVYIRgzuZ6aKiIios4w+lfPpp59iy5Yt8Pb2xoYN\nG7BlyxY4OXHxM7I8Y6EEAHw8+N8qEZEtMRpM7O3t0bt3b2i1WgghMHToUOTl5ZmhNKLOWfbgGLi7\n8k2vRES2xOilHEdHRzQ0NGDYsGFYsWIFvL29odVqzVEbUavKNXV4c1uO0X7e/ThbQkRka4zOmKSk\npKChoQHPPfccKisr8e233+L11183R21E1zn5eznmv/U18go1li6FiIhMwOiMydChQwEAvXv3xrJl\ny0xeEJEh3/9S3KZ+ceGDTVwJERGZgtFgUlpaiv/93/9Ffn4+Ghsbde2rV682aWFErcn87nyb+jnY\nGZ0MJCIiK2Q0mDz22GMYPnw4xo4dC5lMZo6aiDpNzmBCRGSTjAaT2tpapKSkmKMWIoNKK2vb3Pf2\nkd4mrISIiEzF6J+VISEh+Pnnn81RC5FeDY1azH55T5v6rn/2DjjIObtHRGSLjM6Y3H333fj73/8O\npVIJB4c/l47funWrSQsjutrr//2+zX2lEokJKyEiIlMyGkwWLlyIRx55BMOHD+c9JmQxZy9WGe0z\nb1YI+vGFakRENs1oMHFwcMCcOXPMUQvRdU7mleFiaU2b+o7kmjhERDbPaDD5y1/+gn379iE8PNwc\n9RDpNDZpseLjY5Yug4iIzMhoMPn000+xbt06ODk5wd7eHkIISCQSHDp0yBz1UQ+2OeN0m/s69TL6\nnzIREdkAo5/m27ZtM0cdRNf56ocLbe4rlfKGVyKi7sBoMPH19TVHHUSdIuGTOERE3YLeYLJw4UKs\nWLECM2fObPVDn48LkymVVV1uV39OmBARdQ96g8n9998PAHj22WfNVgwRAGR9fx7/u+eXNvUdF+yN\nAzkFnDEhIuom9AaTESNGAAAKCgowY8aMFts+++wz01ZFPZa6rKbNoeTBmOH4raD5/Sa8+ZWIqHsw\n+kr6Dz74oE1tRF3h+XWH29w3JKAfpocNQthIJR6/c6QJqyIiInPR+2fm8ePHkZOTg/LycmzatEnX\nXl1djYaGBrMURz2HVgg88NpX7dqndy85AGDO1OGmKImIiCxAbzBRq9U4ceIEamtrceLECV27k5MT\nli9fbpbiqOf49mRRm/uGh3gjfkKgCashIiJL0RtMoqKiEBUVhQMHDmDcuHHmrAmnTp3Ciy++iJqa\nGvj6+mLlypVwcnICAKxduxbbtm2DTCbDP/7xD11t+/btwyuvvAIhBGbOnImHHnoIAHD+/HkkJyej\nsrISN954I15//XXY2dmhvr4ezz77LHJzc9G3b1+88cYb8PHxMXgOMo3/O5iH1H2/tqnvQ9OHI3S4\n0sQVERGRpRi9x6SqqgrV1dUAgNWrV2POnDktZlBM4YUXXsCCBQvw+eefY+LEiVi/fj0A4MyZM9i1\naxd27tyJ9957Dy+99BKEENBqtViyZAk2bNiAL774AmlpaTh79iwAYOXKlUhMTMTu3bvh4uKie8x5\n69atcHNzw549e3D//fdjxYoVBs9BXauhsQmV1XWoqK5rcygBgNuCvExYFRERWZrRYPLOO+/A2dkZ\nOTk5OHDfc+I6AAAgAElEQVTgAGJjY7F06VKTFpWXl4dRo0YBAG6//Xbs2bMHAJCVlYUpU6bAzs4O\nfn5+8Pf3R05ODnJycuDv7w9fX1/I5XJMnToVmZmZAIDDhw9DpVIBAOLi4pCRkQEAyMzMRFxcHABA\npVLh8OHDBs9BXWvum19j3pqvkbzm63btJ+VjwURE3ZrRYGJn13y15+uvv0ZCQgJiYmJQV1dn0qKG\nDBmCrKwsAMCuXbtQWFgIoPm+F29vb10/Ly8vqNXqVtuLiopQXl4ONzc3SKXNP6ZSqYRarQYAFBUV\nQalsviQgk8ng4uKCiooKveegzquubcDmjNPIOVuK2rpGS5dDRERWyOjLHyQSCXbu3ImdO3fi7bff\nBoAueSonMTERJSUl17XPmzcPr7zyCpYuXYq33noLERERkMubn75o7ZKKRCKBVqtt9RxCiOv2ufIi\nLn3H0tfeFgqFS5v69UTF5bV4anVz2Pzy6LkOH4djbHocY9PjGJsHx9k2GQ0mixcvxnvvvYf4+Hj0\n798feXl5GDNmTKdPvHHjRoPbN2zYAKD5sk52djaA5hmPgoICXZ/CwkJ4enpCCIGLFy/q2tVqNTw9\nPeHu7o6qqipotVpIpVJdf6B5JqSwsBBeXl5oamqCRqOBm5ub3nO0RXGxpm0/fA9yub4RT/57P5q0\nnbtPJ2X2aPh6u3GMTUyhcOEYmxjH2Dw4zqZnquBn9FLOzTffjLffflv3ivqBAwdi8eLFJinmirKy\nMgCAVqvFO++8g7vvvhsAEBERgZ07d6K+vh7nzp1Dfn4+goODMXLkSOTn5+PChQuor69HWloaIiMj\nAQChoaFIT08HAKSmpuraIyIikJqaCgBIT09HaGiowXNQ+6jLanDmfCUeW7Wv06EEAFyd7OHt4dQF\nlRERkTUzOmOSl5eH559/Hmq1GllZWcjNzUVWVhaefPJJkxX1xRdfYNOmTZBIJIiOjsadd94JAAgM\nDMTkyZMxdepU2NnZISUlBRKJBDKZDIsXL0ZSUhKEEIiPj0dAQAAAYP78+UhOTsbq1asRFBSE+Ph4\nAEBCQgIWLlyI6Oho9OnTB6tWrTJ4DmpJCIFyTR0c7GWQy6Sws5NCXVaD/2acRu5vZV1+Pv4TEBH1\nDBJh5FnY2bNnIykpCf/617/w2WefQavVIiYmBmlpaeaq0Wb0hGlDIQR+PFuK/2w175NK6xZOgLeS\nl3JMjdPfpscxNg+Os+lZ7FKORqNBeHi4btZAKpXqbkal7k8Igaf/sx9Jr2bhXFE1vjiYZ/ZQsv7Z\nO2AnM/qfKhERdQNGL+XIZDI0NDTogolardY9fkvdT7mmDl/9cB6Tx/ijsKwGS/7nqG5byvvfmKWG\nAB9XhAR6YPsfL17ju0uIiHoOo8HknnvuwRNPPIHy8nK8+eab2LFjB+bNm2eO2sgMfr1YhT3f5uOm\nQA9s+vIXSCQSVNc24IuDv1usJplUwhkSIqIeymgwiY2NhZ+fH7766ivU1tbitdde072VlWzf0g+b\nZ0S+accieqYU6OuG+ycPw/niSwCAAZ7OFq6IiIjMyWgwAYBRo0YxjHQDdfVNqK5twLniauScLcWM\nsIGWLuk6i+69FQDg5d4bc6YGYeTgfhauiIiIzKlNwYRs18adJ1FWdRmDfFyvuzyz94cLFqrKOKlE\ngrCR3sY7EhFRt8Jg0s0IIfD9L8UY4tcHx38txf6c5rfY5uaVW7gyIiIi4xhMuplTv5fjrdQTcOpl\nB3fXXpYuh4iIqF0MBpOmpiakpaXh1KlTAIAbbrgB06ZNg0wmM0txdL3vfi5GHxd7BPi4Ie1QHnrZ\n2yHyVj/d9tKq5pWfL11uxKXL1RaqsmM8+zpaugQiIrIwvcGksLAQSUlJcHFx0a0V89///hfvvvsu\n3n//fXh78/q/JbyVehwA8P5zEdiW3fyej2NnSpD7WxkG+7ji14tVliyvXZ756814ffMPuu/56n8i\nItIbTJYvX45Zs2Zh9uzZLdo/+OADLF++HP/5z39MXRtd4+rVAxqbtLqvr6xNY0uhZOVjt8PdtRd6\n2ctwub4JANBfwUX6iIh6Or1vsfrpp5+uCyVA89o5J0+eNGVN1Iq6+ibMee0r3fcPrdhruWK6wJX7\nX5772y0YM9wLd0cE4v7JwyxcFRERWZreGRNOq1uPH8+UYLWZ16fpKFcnewxSuuDHs6V6+4QO99J9\nPcDLBQ9Pv9EcpRERkQ3QO2MyYMAA7Nmz57r23bt3Y8CAASYtilqylVACAMP9++Kp+GC929cumIAH\nY4absSIiIrIlemdMnnnmGSQlJWH37t0ICQkBABw7dgzffPMN3n//fbMVSLZlws2+kEgk+NfjYUg7\nlIes71u+xE1uxzVwiIhIP72/JYYOHYq0tDQMHjwYR48exdGjRxEQEIC0tDQMHTrUnDX2OHX1Tdif\ncxF1DU0tbnK1Bfby5v+k+ro4YOLo/hauhoiIbI3B95i4ubnh8ccfN1ct9IdPvzqDr364gPNFlzBl\nrL+ly2kXud2f77jp4+RgwUqIiMgW6Q0m1dXV+Pjjj+Hm5obY2FisXLkSBw8exMCBA7Fo0SK+x8SE\nzhU1vxjtQkk1bOUW5EV/vxU/5ZXBp19vXZtM1ly9vZ0Uz//9VtTUNVqqPCIishF6g8miRYsgk8lQ\nW1uLbdu2YciQIVi4cCGOHDmClJQUrFu3zpx19ihnLlQCAH7KK0fGd+csXE3bBPq5IdDPrUWbnUyK\nlY/dDkcHOzg6cPUDIiIyTu9vi7NnzyItLQ0NDQ0YN24cNm/eDIlEgvDwcEybNs2cNfYohWU1Lb6/\ndkVgS5qbEIx/b2l+Qqi3g12bZkC4Xg8REbWH3ptf7e3tAQByuRze3t4t3msil8tNX1kP9VuBdb69\ndekDY1rcP/KvJ8Lw7yfHWbAiIiLqjvTOmGg0GmRnZwMALl26pPsaaL7/hHoWHw8nVF6q133vIJfB\nQS7Dqw+HoqK63sCeREREbac3mHh7e2P9+vUAAKVSqfv6yvfUNbRagZq6RjQ0avHuZydw+nylpUtq\nF8++veHZt7fxjkRERG2gN5h89NFHenfSaDQmKaYneuLf+3SL2BEREfV0HXoNZ0xMTFfX0SMJIaw6\nlCz6+60tvnfqxSdriIjItDr0m0YI0dV19EibM05bugSDrn38d4CXC+ZMDbqunYiIqKt0aMaEKw93\nTpNWi399cgwZ3523dCmYdUdgu/qHjfSGF+8pISIiE9E7Y3LmzBm9OzU28g2enfHrxSrk/lZm6TIA\nAH6eTgCaF9draPxzXZ5hA/pYqiQiIurB9AaThx56SO9OV95xQrbP18MZKbNHo59bLyz76DuMG6nE\n7SO84erEd9UQEZH56Q0mWVlZ5qyjxxBCoKTiskVruG/SDfgw/Wfd9/5KFwDA8odCr+s7cnA/HP+1\n1Gy1ERFRz6Y3mFy8eBE+Pj6tbsvNzcWNN95osqK6s4yj57E503I3vb73zATIpFJdMDF2u9DchGDw\nXmciIjIXvTe/Pv7447qv4+PjW2x74YUXTFdRN2fJUAIAMmnLf3JjtzFLJBJIpbzZmYiIzEPvjMnV\njwRfe7MrHxduP63WOsdMJuvQg1lEREQmoTeYXP1I8LWPB/Nx4fZb+M5B1JnpZWp3RQTikyz9T1UB\nwAv3jUJ+kQbOjrzJlYiIrIfeYFJXV4ezZ89CCNHi6yvbqH3KNeYbM9VtA1oEk9uCPPHNyaIWfQb7\nuGKwj6vZaiIiImoLvcHk8uXLePDBB3XfX/01Z0za7pdzFVD0cbTY+cNGKOHl3vu6YEJERGSN+Liw\nCWlq6vHqpu+N3mDaWU/cORJrth9vddutwzxxrqjaxBUQERF1Dd75aEK1dc03DZv6ttcRg9xNfAYi\nIiLzYDDpJhLuCNC7zc+j+bXzIwYzwBARkXVjMDElM96LM3mMf+slALhpiAfm330THosdYbZ6iIiI\nOkLvPSbUeea+Rfix2BFwdWq5jpFE0nyz8o0DOVtCRETWjzMmNuq9ZyZc1zZqmCeG9ueqwEREZLs4\nY2JCF0sumezYMqkUs+4IxMnfyyG3M5Qv+Wg3ERHZDgYTE1q9Ncekx580ZgAmjRlgsA9fOUNERLaE\nl3JsxPCBfS1dAhERkckxmNiIuQkhuq/HBXu3eT9OmBARkS2xWDBJT0/HtGnTEBQUhNzc3Bbb1q5d\ni+joaEyePBkHDhzQte/btw+TJk2CSqXCunXrdO3nz5/HrFmzoFKpkJycrFsNub6+HvPmzUN0dDTu\nuusuXLx4scPnsDS7q1YBvjf6hrbvyGRCREQ2xGLBZOjQoVizZg1Gjx7dov3s2bPYtWsXdu7ciffe\new8vvfQShBDQarVYsmQJNmzYgC+++AJpaWk4e/YsAGDlypVITEzE7t274eLigq1btwIAtm7dCjc3\nN+zZswf3338/VqxYAQA4c+ZMu8/RXmVVlzsxOs1cessxxM8NLr1brgDM+0aIiKi7slgwGTx4MAYO\nHKhbsfiKzMxMTJkyBXZ2dvDz84O/vz9ycnKQk5MDf39/+Pr6Qi6XY+rUqcjMzAQAHD58GCqVCgAQ\nFxeHjIwM3bHi4uIAACqVCocPHwbQvA5Qe8/RXq9u+r5D+11t5vgAPPe3W/DGk+Pave+V95n0cXbo\ndB1ERETmYnVP5ajVatx000267728vKBWqyGEgLe3d4v248ePo7y8HG5ubpBKmzOWUqmEWq0GABQV\nFUGpVAIAZDIZXFxcUFFR0e5ztFdxRS1KKjs/YxIe4gOgY1djXkocjfyiavgpnDtdBxERkbmYNJgk\nJiaipKTkuvZ58+YhIiKi1X2unUEBmt9cqtVq9fa/dh/JH9c69B2rvedoj5rLjXj23UOdPo4hbbmU\n4+bsgJGcLSEiIhtj0mCycePGdu+jVCpRUFCg+76wsBCenp4QQrS4eVWtVsPT0xPu7u6oqqqCVquF\nVCrV9QeaZzwKCwvh5eWFpqYmaDQauLm5tfscbaVQuCBm/mft/pkNHa/Vdg8XyGQ984EqfWNCXYdj\nbHocY/PgONsmq7iUc/UMRkREBBYsWIDZs2dDrVYjPz8fwcHB0Gq1yM/Px4ULF6BQKJCWloZVq1YB\nAEJDQ5Geno4pU6YgNTUVkZGRumOlpqYiJCQE6enpCA0N7fA52uJiQWUXjgpQXKxptb2kpBpSac+7\nA1ahcNE7JtQ1OMamxzE2D46z6Zkq+FksmGRkZGDJkiUoLy/HI488gmHDhmH9+vUIDAzE5MmTMXXq\nVNjZ2SElJQUSiQQymQyLFy9GUlIShBCIj49HQEAAAGD+/PlITk7G6tWrERQUhPj4eABAQkICFi5c\niOjoaPTp00cXMjpyjrZ4eOXeTo2Jo4MMtXVNAAA/hZP+jj0vkxARUQ8hEa3dcEHtVl1Tj78u3tWp\nY7g52aPyUj0AIHp0f9wdOaTF9qRXswAA65+9A9Ie+Mww/wIyPY6x6XGMzYPjbHrdbsaku3nglYxO\nH2P8TT6IuNUP+3+8iOjR/fX263mRhIiIegoGky5yqbah08eYMW4QJBIJpo4daLCfpAfOlhARUc/Q\nMx/tsEKJk4cxcBARUY/HYGItmEmIiIgYTKzFYB83S5dARERkcQwmVsLXw8DjwX+Q2/Gfi4iIujfe\n/GpD/v3kOFyub7J0GURERCbDYGJDHB3s4OjAfzIiIuq+eG3Agvy9uI4DERHR1RhMLEjK0SciImqB\nvxot6NHYEbCXS5Ewoe3r8RAREXVnvGHBgjzcHPHu/AmWLoOIiMhqcMbEQqbd7m/pEoiIiKwOg4kF\nKPr0wp3hvHxDRER0LQYTC1j2YKilSyAiIrJKDCYWIJVyYRwiIqLWMJhYgJSrCBMREbWKwYSIiIis\nBoMJERERWQ0GEyIiIrIaDCZERERkNRhMiIiIyGrwlfRmdOPAvrg7aqilyyAiIrJanDExo/gJgfD1\ncLJ0GURERFaLwcRMHo8bCX+li6XLICIismoMJmZy6w0KS5dARERk9RhMiIiIyGrw5lcTCw7oh7kJ\nIZYug4iIyCZwxsTEZFywj4iIqM0YTIiIiMhqMJiY2OggT0uXQEREZDMYTExM4eZo6RKIiIhsBoOJ\niQX4ulm6BCIiIpvBYGJCfV0cLF0CERGRTWEwMaEnZ460dAlEREQ2hcHEhAYqXS1dAhERkU1hMCEi\nIiKrwWBiIkr33pYugYiIyOYwmJhIeIiPpUsgIiKyOQwmREREZDUYTIiIiMhqMJiYSEhgP0uXQERE\nZHMYTEzgzvDB8O7nZOkyiIiIbA6DiQk495ZbugQiIiKbxGBiAv5eLpYugYiIyCYxmBAREZHVYDAh\nIiIiq2GxYJKeno5p06YhKCgIubm5uvaKigrcd999uPnmm7F06dIW++Tm5iImJgYqlQrLli3TtVdW\nViIpKQkqlQpz5syBRqPRbVu6dCmio6MxY8YMnDx5UteempoKlUoFlUqFHTt2GD1He8ikkg7tR0RE\n1NNZLJgMHToUa9aswejRo1u0Ozg4YO7cuXjuueeu2+fFF1/EsmXLsHv3buTl5WH//v0AgHXr1mHs\n2LHYvXs3xowZg7Vr1wIAsrOzkZ+fjz179uDll19GSkoKgOYg89Zbb2Hr1q3YsmUL1qxZowsz+s5B\nREREpmexYDJ48GAMHDgQQogW7Y6Ojrjllltgb2/for24uBiXLl1CcHAwACA2NhYZGRkAgMzMTMTF\nxQEA4uLikJmZqWuPjY0FAISEhECj0aCkpAQHDhxAWFgYXFxc4OrqirCwMOzfv9/gOdpD0cex3fsQ\nERGRDd1jolaroVQqdd97eXlBrVYDAEpLS+Hh4QEAUCgUKCsrAwAUFRW12EepVEKtVkOtVsPb2/u6\nYxk6R1tNDh0ARwe79v+AREREBJP+Bk1MTERJScl17fPmzUNERES7jnXtzAoASCSG7+W4dh8hBCQS\nid5jdeQc15LLbCbrERERWR2TBpONGzd22bGUSiUKCgp036vVanh6egIAPDw8UFJSAg8PDxQXF8Pd\n3R1A84xHYWGhbp/CwkJ4enpCqVTiyJEjLdpDQ0MNnqOtevd2gELB95iYCsfW9DjGpscxNg+Os22y\nimsOrc1UXNuuUCjg7OyMnJwcjBw5Ejt27MC9994LAIiIiMD27dvx0EMPITU1FZGRkQCAyMhIbNq0\nCVOmTMGxY8fg6uoKDw8PjBs3Dm+88QY0Gg20Wi0OHjyIBQsWwNXVVe852qqmpg7FxRrjHandFAoX\njq2JcYxNj2NsHhxn0zNV8LNYMMnIyMCSJUtQXl6ORx55BMOGDcP69esBNAeNS5cuoaGhAZmZmdiw\nYQMCAgKQkpKC559/HnV1dQgPD0d4eDgA4MEHH8TcuXOxbds2+Pj4YPXq1QCA8ePHIzs7GxMnToSj\noyOWL18OAHBzc8Njjz2GmTNnQiKR4IknnoCrqysA6D0HERERmZ5E6JuuoHaJmf8ZAGB62EDE/mWw\nhavpnvgXkOlxjE2PY2weHGfTM9WMCe/U7GLtvVmWiIiI/sRgQkRERFaDwaSLcb6EiIio4xhMuhqT\nCRERUYcxmHSx0cPa994TIiIi+pNVvMeku9jw7B28+ZWIiKgTOGPSRcKCfRhKiIiIOonBpIs8e98o\nS5dARERk8xhMughnS4iIiDqPwYSIiIisBoMJERERWQ0GEyIiIrIaDCZERERkNRhMiIiIyGowmBAR\nEZHVYDAhIiIiq8FgQkRERFaDwYSIiIisBoMJERERWQ0GEyIiIrIaDCZERERkNRhMiIiIyGowmBAR\nEZHVYDAhIiIiq8FgQkRERFaDwYSIiIisBoMJERERWQ0GEyIiIrIaDCZERERkNRhMiIiIyGowmBAR\nEZHVYDAhIiIiq8FgQkRERFaDwYSIiIisBoMJERERWQ0GEyIiIrIaDCZERERkNRhMiIiIyGowmBAR\nEZHVYDAhIiIiq8FgQkRERFaDwYSIiIisBoMJERERWQ0GEyIiIrIaDCZERERkNRhMiIiIyGowmBAR\nEZHVsFgwSU9Px7Rp0xAUFITc3Fxd+8GDB3HnnXdi+vTpmDlzJg4fPqzblpubi5iYGKhUKixbtkzX\nXllZiaSkJKhUKsyZMwcajUa3benSpYiOjsaMGTNw8uRJXXtqaipUKhVUKhV27Nhh9BxERERkehYL\nJkOHDsWaNWswevToFu3u7u5Yu3YtPv/8c7z66qt45plndNtefPFFLFu2DLt370ZeXh72798PAFi3\nbh3Gjh2L3bt3Y8yYMVi7di0AIDs7G/n5+dizZw9efvllpKSkAGgOMm+99Ra2bt2KLVu2YM2aNbow\no+8cREREZHoWCyaDBw/GwIEDIYRo0T5s2DAoFAoAwJAhQ1BfX4+GhgYUFxfj0qVLCA4OBgDExsYi\nIyMDAJCZmYm4uDgAQFxcHDIzM3XtsbGxAICQkBBoNBqUlJTgwIEDCAsLg4uLC1xdXREWFob9+/cb\nPAcRERGZnlXfY5Keno7hw4dDLpdDrVZDqVTqtnl5eUGtVgMASktL4eHhAQBQKBQoKysDABQVFbXY\nR6lUQq1WQ61Ww9vb+7pjGToHERERmZ6dKQ+emJiIkpKS69rnzZuHiIgIg/uePn0aq1atwvvvvw8A\n182sAIBEIjF4jGv3EUJAIpHoPVZHzkFERERdx6TBZOPGjR3ar7CwEE888QRef/11+Pn5AWie7Sgo\nKND1UavV8PT0BAB4eHigpKQEHh4eKC4uhru7O4DmGY/CwsIWx/X09IRSqcSRI0datIeGhho8R1so\nFC4d+nmp7TjGpscxNj2OsXlwnG2TVVzKuXqmQqPR4OGHH8aCBQtw00036doVCgWcnZ2Rk5MDIQR2\n7NiByMhIAEBERAS2b98OoPlpmyvtkZGRuidujh07BldXV3h4eGDcuHE4ePAgNBoNKisrcfDgQYwb\nN87gOYiIiMj0JKK16xdmkJGRgSVLlqC8vByurq4YNmwY1q9fj3feeQfr1q3T3RgrkUiwYcMGuLu7\n48SJE3j++edRV1eH8PBwvPDCCwCAiooKzJ07FwUFBfDx8cHq1avh6uoKAHj55Zexf/9+ODo6Yvny\n5bjxxhsBANu3b8e7774LiUSCRx99VHeTrL5zEBERkelZLJgQERERXcsqLuUQERERAQwmREREZEUY\nTIiIiMhqMJh00r59+zBp0iSoVCqsW7fO0uVYvUWLFuH2229HTEyMro1rHXWtwsJC3HfffZgyZQpi\nYmLw4YcfAuA4d7X6+nokJCQgNjYWMTExWLNmDQDg/PnzmDVrFlQqFZKTk9HY2KjrP2/ePERHR+Ou\nu+7CxYsXdcdau3YtoqOjMXnyZBw4cEDXzs+XZlqtFnFxcXjkkUcAcIy7WkREBKZPn47Y2FjEx8cD\nsPDnhaAOa2pqElFRUeL8+fOivr5eTJ8+XZw5c8bSZVm1b7/9Vvz0009i2rRpurbXX39drFu3Tggh\nxNq1a8WKFSuEEELs3btXPPjgg0IIIY4dOyYSEhKEEEJUVFSIyMhIUVVVJSorK3VfCyFEfHy8+PHH\nH4UQQjzwwANi3759ZvvZrEVRUZH46aefhBBCVFdXi+joaHHmzBmOswnU1NQIIYRobGwUCQkJ4tix\nY+Lpp58WO3fuFEII8c9//lNs3rxZCCHEpk2bREpKihBCiLS0NDF37lwhhBCnT58WM2bMEA0NDeLc\nuXMiKipKaLVafr5cZePGjWL+/Pni4YcfFkIIjnEXi4iIEBUVFS3aLPl5wRmTTsjJyYG/vz98fX0h\nl8sxdepU3To91LpRo0bpHuW+gmsddS2FQoGgoCAAgJOTEwICAqBWqznOJuDo6Aig+S/1xsZGSCQS\nHDlyBCqVCkDzOLe2ppdKpdKtnJ6VlYUpU6bAzs4Ofn5+8Pf3R05ODj9f/lBYWIjs7GwkJCTo2g4f\nPswx7kJCCGi12hZtlvy8YDDphNbW3CkqKrJgRbaprKyMax2ZyPnz53Hq1CmEhIRwTSkT0Gq1iI2N\nRVhYGMLCwtC/f3+4urpCKm3+aL0ylkDLcZbJZHBxcUFFRYXBcebnC/DKK6/gmWee0S0PUl5eDjc3\nN45xF5JIJJgzZw5mzpyJLVu2ALDsGnQmfSV9dyf4ChiTunZ8Bdc6apdLly7hqaeewqJFi+Dk5KR3\nLDjOHSeVSrFjxw5UV1fj8ccfx9mzZ6/rc2Vs2jue1/4F2xPt3bsXHh4eCAoK0i0jIoS4bsw4xp3z\n8ccf68JHUlISBg0aZNHPC86YdIJSqWxxc1V719ahZv369dMt9tjWtY6uHver2zuz1lF30tjYiKee\negozZsxAVFQUAI6zKTk7O2P06NH48ccfUVVVpfuFd2XMgJbj3NTUBI1GAzc3t+vGU9/498Rx/v77\n75GVlYXIyEjMnz8fR44cwSuvvAKNRsMx7kIKhQIA4O7ujqioKOTk5Fj084LBpBNGjhyJ/Px8XLhw\nAfX19UhLS+PaOm1wbYLmWkddb9GiRQgMDMT999+va+M4d62ysjLdkwqXL1/GoUOHEBgYiDFjxiA9\nPR1Ay3GOiIhAamoqACA9PR2hoaG69p07d6K+vh7nzp1Dfn4+goOD+fkCIDk5GXv37kVmZiZWrVqF\nMWPGYOXKlRzjLlRbW4tLly4BAGpqanDgwAEMHTrUsp8X7b9/l66WnZ0toqOjxcSJE8XatWstXY7V\nS05OFmFhYeLGG28U48ePF1u3bhUVFRXi/vvvF9HR0WL27NmisrJS1/+ll14SUVFRIiYmRpw4cULX\nvm3bNjFx4kQRHR0tUlNTde3Hjx8X06ZNExMnThRLliwx689mLY4ePSqGDRsmpk+fLmbMmCFiY2NF\ndna2KC8v5zh3oVOnTonY2Fgxffp0MW3aNPH2228LIYTIz88X8fHxIjo6Wjz99NOivr5eCCFEXV2d\neOqpp8TEiRNFQkKCOHfunO5Y7777roiKihKTJk0S+/fv17Xz8+VPR44c0T2VwzHuOvn5+brPimnT\npirmdvkAAAOLSURBVOnGwJKfF1wrh4iIiKwGL+UQERGR1WAwISIiIqvBYEJERERWg8GEiIiIrAaD\nCREREVkNBhMiIiKyGgwmRGRyEREROHPmDFJTU/H77793+fE1Gg3Wr1/fou2FF17Ad9991+XnIiLT\nYjAhIpO7smbG9u3bkZeX1+79jb1uqbKy8rpgsnTpUtx6663tPhcRWRZfsEZEJhcREYE5c+Zg5cqV\n8PDwgLOzM5555hmMHTsW69evx549e9DY2AgvLy8sXboU/fr1w5o1a3D69GlUV1ejoKAAn3zyCd5+\n+20cPXoUDQ0N6Nu3L1555RV4e3vj4Ycfxtdff40hQ4agV69e2Lx5M+6991488MADGD9+PEpLS5GS\nkoL8/HwAQFJSkm7p9oiICMTGxuLgwYMoLi5GUlIS/va3v1lyuIh6NK4uTEQmJ5FIMGbMGIwYMUIX\nFgDg888/R35+Pj799FMAwObNm7F8+XKsXLkSAHD8+HGkpqbCzc0NAPDwww/j2WefBQBs2bIFK1as\nwKpVq/DPf/4T8fHxunVSrrV06VIMHToUa9asQXFxMeLi4jBixAgEBgYCaF7r5uOPP8aFCxcwbdo0\n3HnnnXB0dDTpmBBR6xhMiMhisrKykJubq5u9aGpqgqurq257eHi4LpQAwN69e7F582bU1NSgsbHR\n6PLpVxw8eBDPPfccgOaVVCdMmIAjR47ogsnUqVMBAL6+vujTpw8KCwsxaNCgLvkZiah9GEyIyGKE\nEHj0/9u3YxSFgTAMw18aAxIISCpbIXgEsRFSxSIKNkLwAvaxEqz1FHZJ5yFyAVtPYGMjIYUIhq0M\nuOsuFspO8T5VmBkmM93HPzPzuSaTydP+ZrNZfx+PR63Xa+12O7Xbbe33eyVJ8tJ/LMv6M8TYtv0w\n9na7vbgDAO/G5VcAH3e/yuY4joqiqNuDIFCapnXb9XrV4XB4OkdZlmo0GvI8T1VVKcuyus9xHF0u\nl18DRb/fr4+LTqeT8jxXr9d7y94AvBcVEwAfd69WTKdTbTYbbbdbLRYLjcdjnc9nzWYzWZalqqoU\nx7G63e6POXzfVxiGGg6HarVaGgwG9XNg13UVRZGiKJLrusqy7KFCslwutVqtNBqNJElJkqjT6Tys\n7ftaAfwPXuUAAABjcJQDAACMQTABAADGIJgAAABjEEwAAIAxCCYAAMAYBBMAAGAMggkAADAGwQQA\nABjjC39iFFDBf2QpAAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7f2fef23f940>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots(figsize=(8, 6))\n",
"\n",
"ax.plot(advi_fit.elbo_vals);\n",
"\n",
"ax.set_xlabel('Iteration');\n",
"ax.set_ylabel('ELBO estimate');"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can look at the top words per topic."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"post_topic_word = (pm.distributions\n",
" .transforms\n",
" .stick_breaking\n",
" .backward(advi_fit.means['topic_word_stickbreaking_'])\n",
" .eval())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([['charles', 'diana', 'prince', 'royal', 'queen', 'family',\n",
" 'marriage', 'church', 'public', 'king'],\n",
" ['church', 'years', 'catholic', 'told', 'people', \"n't\",\n",
" 'government', 'former', 'president', 'peace'],\n",
" ['mother', 'teresa', 'home', 'heart', 'doctors', 'hospital',\n",
" 'order', 'told', 'sunday', 'work'],\n",
" ['pope', 'vatican', 'surgery', 'hospital', 'operation', 'doctors',\n",
" 'paul', 'roman', 'since', 'last'],\n",
" ['french', 'france', 'visit', 'church', 'against', 'during', 'pope',\n",
" 'paris', 'catholic', 'later']], \n",
" dtype='<U18')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vocab[post_topic_word.argsort(axis=1)[:, -10:][:, ::-1]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"While we have restricted ourselves to 99 documents and 100 words, this approach can be made scalable through minibatches. See [this notebook](https://gist.github.com/taku-y/66c9613ab29a150e4493b899a6507354) for inspiration. (I have not worked out the details of `pymc3`'s minibatched ADVI API yet.)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
},
"widgets": {
"state": {},
"version": "1.1.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment