Skip to content

Instantly share code, notes, and snippets.

@pilipolio
Last active December 16, 2015 17:41
Show Gist options
  • Save pilipolio/5472470 to your computer and use it in GitHub Desktop.
Save pilipolio/5472470 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "GaussianMixtureModels"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Gaussian mixture models and unsupervised clustering\n",
"===================================================\n",
"\n",
"Model and toy data set\n",
"----------------------\n",
"\n",
"A gaussian mixture model in $\\mathbb{R}^d$ is defined by the following parameters:\n",
"\n",
" - the mixture's number of components $m$;\n",
" - the mixture's components probabilities $\\pi_k$ for $k= 1, \\ldots, m$;\n",
" - the components' mean vectors $\\mu_k = (\\mu_{k,1}, \\ldots, \\mu_{k,d})$ for $k= 1, \\ldots, m$;\n",
" - the components'covariance matrixes $\\Sigma_k \\in \\mathcal{M}_{d,d}$ for $k= 1, \\ldots, m$;\n",
"\n",
"A data-set of $n$ observations stemming from that model have the following hidden parameters:\n",
"\n",
"- $(a_i)_{i=1, \\ldots,n}$ the latent allocation of each point $n$ to one of the $m$ components.\n",
"\n",
"$$ \\pi \\longrightarrow \\underbrace{A \\rightarrow X}_{n} $$\n",
"\n",
"$$ A | \\pi \\sim \\text{Mult}(k, \\pi)$$\n",
"\n",
"$$ X | A=k \\sim \\mathcal{N}(\\mu_k, \\Sigma_k)$$\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d = 2\n",
"m = 3\n",
"n = 100\n",
"\n",
"def generate_toy_gmm_data_set(d, m, n):\n",
" mu_m = np.random.uniform(0, 10, size= m * d).reshape((m,d))\n",
" prob_m = np.random.uniform(0, 1, size= m)\n",
" prob_m /= np.sum(prob_m)\n",
" assignements_n = np.where(np.random.multinomial(1, prob_m, size = n))[1]\n",
" #cov_m = np.random.uniform(0, 3, size= m * d * d).reshape((m, d, d))\n",
" iid_n = np.random.normal(size= n * d).reshape((n, d))\n",
" x_n = mu_m[assignements_n] + iid_n\n",
" return mu_m, prob_m, assignements_n, x_n\n",
"\n",
"mu_m, prob_m, assignements_n, x_n = generate_toy_gmm_data_set(d, m, n)\n",
"print 'Generated n={} points from a mixture of m={} gaussians prob_m = {}'.format(n, m, prob_m)\n",
"\n",
"colors_m = np.array(['r','g','b','k'])\n",
"plt.scatter(x = x_n[:,0], y = x_n[:,1], s = 10, c = colors_m[assignements_n], marker = 'o', faceted=False, alpha=.7)\n",
"plt.scatter(x = mu_m[:,0], y = mu_m[:,1], s = 50, lw = 3, c = colors_m, marker = 'x', edgecolors=None);\n",
"plt.xlim([0, max(plt.xlim()[1], plt.ylim()[1])]);plt.ylim([0, max(plt.xlim()[1], plt.ylim()[1])]);"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Generated n=100 points from a mixture of m=3 gaussians prob_m = [ 0.27417081 0.41692802 0.30890117]\n"
]
},
{
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAWwAAAD5CAYAAAADQw/9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XlcVOX+B/DPsMmu4AIq4JbIIgIukXb14gIWqZmZV6hE\nyK5WtqmV3eoX3q5hmXW11Exzy0oru+o1xSIb02uYiiLhmoqBgInKJsvAzPn98SiL7MMMhzPzeb9e\nvBjOnDnnexS+88xznuf7qCRJkkBERG2ehdwBEBFR0zBhExEpBBM2EZFCMGETESkEEzYRkUIwYRMR\nKYSVsQ6sUqmMdWgiIpNW32hro7awJUkyma8333xT9hh4LaZ/PaZ0LaZ2Pa11LQ1hlwgRkUIwYRMR\nKQQTdhOFhobKHYLBmNK1AKZ1PaZ0LYBpXU9buBaV1Finib4HVqka7Y8hIqKaGsqdbGETESkEEzYR\nkUIwYRMRKQQTNhGRQjBhExEpBBM2EZFCMGETESlEgwk7NjYWbm5uCAgIqNz20ksvwdfXF4GBgZg0\naRLy8/ONHiQRETWSsGNiYpCQkFBjW3h4ONLS0pCSkgJvb2/Ex8cbNUAiIhIaTNjDhw+Hi4tLjW1h\nYWGwsBAvCwkJQWZmpvGiIyKiSi3qw167di0iIiIMFQsRETVA7wUMFi5cCBsbG0RFRdW7T1xcXOXj\n0NDQNlE8hYioLVGr1VCr1U3at9HiT+np6Rg/fjxSU1Mrt61fvx6rV6/Gjz/+CFtb27oPzOJPRETN\nZtDiTwkJCVi8eDG2b99eb7ImovqdvHoSX6Z+iZyiHLlDIYVpsIUdGRmJffv2ITc3F25ubliwYAHi\n4+Oh0Wjg6uoKABg6dChWrFhR+8BsYRPVkleah9jtsSjXlcPT2RMrHqj9t0PmraHc2WAf9pdffllr\nW2xsrGGiIjIBn6V8hh1nd2C413A8F/Jco/tX6Cqgk3QAgDJtmbHDIxPDBQyI9CRJEh7a8hC0khYA\nsHHiRrjYuTTyKuCXjF9wLOcY7rvrPvR26W3sMElh9G5hE1H9VCoVhnQbgqTLSfDp5IP2tu2b9Lqh\nnkMx1HOokaMjU8QWNlEL6CQdrhRdQWeHzrCyYPuHWq6h3MmETUTUhnBNRyIiE8DPcKRIucW5WHZo\nGawsrPB8yPNN7j8mUjK2sEmRvj31LY7lHMPhrMPYdW6X3OEQtQombFKkXh16VT7u2aGnfIEQtSLe\ndCTFOnX1FKwsrNC3Y1+5QyEyGI4SISJSCI4SISIyAUzYREQKwYRNRHUqKS/BV2lf4ccLP8odCt3C\ncdhEVKdPj32KPef3AAAcbRwR4hEic0TEFjbRHYo0RcguzJY7DNlV6CoqH5frymWMhG5jC5uompyi\nHMzZMweFmkLEBsXiId+H5A5JNk8EP4H27drD1c4V93reK3c4BLawiWo4k3sGhZpCAMDR7KMyRyMv\np3ZOiAmOwYM+D0KlUskdDoEJm6iGwd0Gw6+zH1xsXTDRZyIAQKPVyBwVkcCJM0T1kCQJC/cvxKHL\nh3Bfn/vwzN3PyB0SmQFOnCHSw43SGzh0+RAA4PsL3zerAVJYVohLeZeMFRqZKSZsonp0sO2AQV0H\nAQBG9hzZ5H7c3OJczPpuFmbvno2NKRuNGWKlM7lnsOf3PSgpL2mV85E8OEqEqB4WKgvEhcbhpuYm\nHGwcmvy6izcuoqCsAACQciXFWOFVyizIxCuJr0AraXE0+yj+MfwfRj8nyYMtbKJGNCdZA0CgeyAG\ndx2MTvadMMVvipGiqpJfml+5cvv1kutGPx/JhzcdiUzAV2lf4fz184gMiGR9cIVjeVUiI9HqtCgu\nL4ZTOye5QyETwVEiREZwU3MTU76egn4f9cPYTWORV5pX4/lVq1Zh48aaNx3ff/99bN26tTXDJBPS\nYMKOjY2Fm5sbAgICKrddv34dYWFh8Pb2Rnh4OPLy8ho4ApHpunDjAlL/TIVW0uJM7hkczaqaGblq\n1SrMmjUL06dPr0zaS5Yswdy5c/G3v/2NSZv00mDCjomJQUJCQo1tixYtQlhYGM6ePYvRo0dj0aJF\nRg2QqLVlFWY1aQx13459EeAWABVU6NmhJ/p36Q8A0Gg0+PjjjwGIyTfTp09HYGAg5s2bBwDQarVY\nuXIluwyp2Rrtw05PT8f48eORmpoKAPDx8cG+ffvg5uaGnJwchIaG4vTp07UPzD5sUqDjOccRp46D\nVtJizj1zMLLXyEZfc634GhxsHHBTcxMlFSXwcPZAbm4uxowZg5SU2sP6RowYge+++w6Ojo7GuARS\nuIZyZ7PHYV+5cgVubm4AADc3N1y5cqXefePi4iofh4aGIjQ0tLmnI2pVp3NPVw6RO5V7qkkJu6N9\nR5y7dg7zf5wPjVaD2UNmY+xdY5GYmIjOnTvX2p/JmqpTq9VQq9VN2rdFE2dUKlWDs7+qJ2wiJQjv\nE46kzCScu3YOfVz61HgurzQPNpY2sLe2r/W607mnK4tEnbhyAmPvGosNGzbUeY5vv/0W06ZNM3zw\npEh3NmYXLFhQ777NHiVyuysEALKzs9GlS5fmR0jURrnaucK5nTMsLCyw4sgK/JH/BwBg/6X9mL5t\nOmK2xyA9L73W64b3GA7fTr7o5tQND/o8iCVLllT2Wd9p+vTp9SZzooY0O2FPmDCh8pdtw4YNmDhx\nosGDIpJTSYWox6GTdCirKAMAHLp8CFpJjLlOyandL93BtgPeDXsXq8atQk+nnvj6668rnxsxYgTS\n09Ph3MMZgLgRuenLTU2+x5NVmIX5ifOxQL0AhWWFLb08UrAGE3ZkZCSGDRuGM2fOwNPTE+vWrcP8\n+fPxww8/wNvbG3v37sX8+fNbK1aiVvHiPS9ibJ+xePbuZ9G3Y18AwNg+Y+Fk44RuTt0wzHNYg6+3\nsbHBnj17EBISUnmDsUePHpi7Yi6cezij1+Be2P6f7fV2J2q0GmQXZlcm9K0ntyLtahqOZB/B9+e/\nN+zFkqJwpiORAZRWlCJOHYeLeRcxa9AsjOw1Evn5+bC0tKy8wShJEi5mXYS7qzvs7Wr3g98+zpw9\nc5BRkIHw3uF4NuRZ7Dq3CyuPrBTFqP4ah+Cuwa15adTKDDpKhIhq++3P35B2NQ0AsP3MdozsNRLt\n27evsY9KpULv7r0bPE5OUQ4yCjIAAIezDgMAIvpGoLdLb9ha2bJOiJnj1HQjOHECePllYO1agB8y\nzMNdrnehk30nAMA9HvfofRyv9l4Y5jEM9tb2eNj34crtPp18mKyJXSLGMHs2cOnWRLl33wV8feWN\nh1pHaUUpCsoK0MWBI6dIfyz+1Mq8vMR3e3ugjnkTZKJsrWwNk6zT04GkJECrbfmxyKSwhW0E5eVA\ncrJI3F27yh0NKcqlS8ALLwAVFcD99wNPPy13RNTKeNOxlVlbAyEhckdBbU12YTY+OfoJXO1cMXPw\nTNhY2tTeKSdHJGsAyMxs3QCpzWPCJmqKrCwgMREIDBRfevjsxGc4kn0EANCvUz+E9wmvvdOQIcAD\nDwAZGUBsbEsiJhPEhE3UFP/6l0ii27YBa9YArq7NPkQ3p24AAPviCniU2da9k4UFMGtWSyIlE8aE\nTYpx4cYFrDq6Cp3tO+O5kOfq7lIwlts3AHU68dWA0opSfLt3Oeyu5uHBia/AwkFMnIkKiEL/Alv0\nWbwGTjuWAHMk4K9/NXbkZEKYsEkxPj/xOU5ePQkAGNR1UJNKnxrMa68BCQlAcDDQqVODu27evxJb\nE94DJB1sz6fj/v/7DABgobJA0FVLQGcNQAccPVpnwj559SRO557GqF6j0MG2gzGuhhSKw/rakIoK\n4M03gcmTgZ075Y6m7enl0gsAYKmyhIdT99Y9uZcX8Pe/iz7mRqhKSwFJtMJVRTdrPjl8ONCjB9Cx\nIxARUeu1V4qu4LW9r2Hd8XWIPxBvkNDJdLCF3YacPy+GAwKiq3TcOHnjaVM0GjzqH4n+HbzhumId\nvL6aDzz5pBj61sZEhs+FXXYu7LJzER75fzWf7NQJ+Oijel9bpi1DhU6MErmpuVnvfmSemLDbEE9P\noHt34PJl4B79ZzebjsJC4JNPxDtZRgZUbm4Iio4G0m4Nd9u5s+kJ+8wZcSwPD+DZZwEr4/3q21ja\nYErsEr1e69XeC8/d/RzSrqZhog9LF1NNTNhtiL09sGwZkJcHcF0IAFu3Amo1cPo04OQkthUVAd26\niWF2997b9GNt3AicPSu+QkKAYQ2XSJXL7QkTPdr3qBxV0ixffw3s3QuEhQGTJhk4OpIbE3YbY2PD\nZF3p9j9Ep05iuJubm0i2YWGi9e3i0vRj9e4tqnLZ2IhWdhv1U/pPWPbrMgBAQVkBooOim/7ikhLx\nxgQA69eLPjWbVhxJQ0ZnFgk7JUX0CQ8cCIwfL3c01GQREeLmnKUlMGCA6MawuHWfvDnJGgBiYoBB\ng8SbQLemtVw1Wg2yCrPg4ewBK4uqP5UiTRF2nt2Jro5d8deehh2Wd3tdSED0ZzdLu3ZAz56iFknv\n3kzWJsgsaolMnw5cuyYef/IJ63tQ43SSDnP2zMH5G+cR5B6Et0a+Vfncewffw75L+wAA/xr5LwS6\n6zfzsS5anRZbT23FTc1NTPGfAgcbh+YdoKRE9Pn36QPY2RksLmo9Zl9LpHNnkbDt7QGHZv7+k0Kk\npwPHjom+aTe3Fh+usKwQ52+cByBWQddJOlioROv+9iiOOx8bgqWFJab4T9H/AHZ2QP/+hguI2hSz\naGEXFgKHDom61N1befgutYLiYlF34+ZN8fHpk08MctjVR1fj5z9+xgN9H8DU/lMrt+eV5mHrya3o\n6tQVEX2rxlLrJB2WJi3FkewjmOo/FeP7sf+Nmq+h3GkWCZtMXEEBMG2amD7u6Ah8+aUsYWTkZ+Dp\nXaIcqpONE754+AtZ4iBlM/suETJxzs7AvHmi6P/YsbKF0cWhCzycPZBZkIlgdy6US4bHFjaRAZVW\nlCKnKAde7b0q+7yJmoNdIkRECsE1Hck4mlBq1KCuXgVycxvfr7S0deOqw9WbV/HzpZ9RWFYoaxxk\nWpiwST+nTgGRkcDjj4shdcZ2+LAo9jRjhhi+V589e4C//Q2YOVPM8W+IRgNkZwMG/iRYUl6Cud/P\nxeKDi/HGT28Y9Nhk3vRO2PHx8fD390dAQACioqJQVtbMWVmkbPv2ieF0BQXA//5n/POlpIhRIFqt\nmGJen717Res6JwdIS6t/v7IyYM4cUTJ12TKDhlpSUYIbpTcAAFmFWY3un1+aj+W/LsemE5ug1YmF\nEr4//z3e+OkNHPjjgEFjI2XTK2Gnp6dj9erVSE5ORmpqKrRaLTZv3mzo2BQpNRX4/HPRcDNpw4aJ\nqdD29sDddxv/fGPHihogPXqIWiL1GTNGTGXv1q3hCSRXrogVygHg119rP9+CLhVXO1f8feDfEdAl\nAC/c80Kj+29M2YiE8wnYkrYFey/uRZGmCMsPL8fxnON4/5f3oZPk7d6htkOvYX3Ozs6wtrZGcXEx\nLC0tUVxcjO6ckYLr18UCBOXlwMGDwPLlckdkRAMGAJs2idoeddSsyMkBPvhArCA/bx7QoaULp3h6\nAitXNr5fWJhYxcXaGlCp6t/Pw0NU+zt2DOUTJ8L69vbCQuDVV1GekQHr554DRo/WK9zx/cY3eeKM\no41jjcftLNvBxdYF10quoYtDF442oUp6JWxXV1fMnTsXXl5esLOzw9ixYzFmzJha+8XFxVU+Dg0N\nRWhoqL5xKkL1e3Dl5fLG0ips61lIFqLY1kmxmhcSEoCpVRMFodVpYWlhaby47nwD0WjEx56KCuCx\nx8T0bQsLYP58HDhwANOmTcO2fv0wYMAAIDUVPxw5gqdSU7Grc2d465mw76STdPjp4k8AgJG9RtZI\nwo8HPg53R3c4t3PGULu+wIGDWBLyfzhRegnBXTme29Sp1Wqo1eqm7Szp4ffff5d8fX2l3Nxcqby8\nXJo4caK0adOmGvvoeWjF++UXSVq5UpIuXpQ7EnklJkrSuHGSNH68JP36a9X25b8ul8Z9MU6K3x8v\n6XQ645y8vFyS0tMlSaMRP3/zjQhm3DhJ2rChcrf9+/dLjo6OEgCpU6dOUkpKivT9t99KtpaWEgCp\nq6urdObMGYOE9N8z/5XGfTFOGvfFOGn76e1171RWJknR0SLOp54yyHlJeRrKnXq1sI8cOYJhw4ah\nY8eOAIBJkybh4MGDePTRR/U5nEm55x6uFgOIngQvL9Ez0bOn2KaTdEj4PQEA8L+M/yG/LN84i8zG\nxYmblP36Ae++W7PiV7XH1tbWsLhVrjU3NxeBgTWr7lk5OMDKQCvTFGmK6nxcQ1mZ6FcDRJ+STldV\nTpYIenaJ+Pj44K233kJJSQlsbW2RmJiIu1vjxpOJqKgQf5umXjmwb9+aP1uoLDCixwjsu7QPwe7B\ncG7nbPiTVlSIZA2IZcFu3hQ3LG1sRD9VtRuWISEh+P777xEeHo6CgoIah/H09IRarUbv3r0NEtZE\nn4m4qbkJnaTDQz4P1b2TkxPw9NPA/v3AffcxWVMtes90fPfdd7FhwwZYWFhg4MCBWLNmDaytK2/d\nmMVMR50O+OMPUc2zqaWHb9wA5s4V8z+eegrIzxd9vZGRopqgOSgsK4SjjSNU9d0UlKSGbxg2ZtMm\nYPduYORIMW67EQsXLsTrr79eY9u2LVvwYO/e4j/3o49E4p83r+rjApGRcGq6kSxeDPz8syjZunSp\nGOXWmAMHgHfeEY+7dq0a/tejR4OLaZuHGzeAf/xDzGicOxcYOtTop/zhhx8wYcIElJaW1tjeyc4O\nPw4ZggE2NqJ1bmEh+nleaHyYHlFLcGq6kdyecHf5MvDnn017zYABYkSZpaVY8Pt2kjdAzX3lS04G\nMjNFf9GePUY/XWJiYp3JGgByS0owOikJv+XnV7X2/f1bftIDB4DvvjOTYURkaCyv2gJRUcCWLWKp\nwKau6+rsLIYTa7UiaQ8aBFy4wBuVAMREFxcX0U/UnBXRG/Lzz6KLxN8fePbZGv3CnTt3hoODA0pL\nSyv7rK9evVrZp93B3h4dnnwSmDBBvIl4eTX//DqdmKTTubOYoHP749Wff4p1JomagV0i1LZoNCI5\nOjkZ5ngzZoiECQBLlgDe3jWeTklJQUxMDL755pvKG4yHDh3CM888g23btsGjpSusL1gAHDkC+PkB\no0ZV9XuFh4s3EKI7sA+bzNf77wM//QS4uopk6eQkpqQXF1fe5ZUkqdYN0Lq2NSg1Ffj2WyAoCHjw\nQbGtogJ4qNqIkM8+A3btEp8goqKA9u3rPhaH85k1Jmwz9sUXYpTYAw8A48bJHY0MdDrg7FlxZ9jJ\nSSTWN94QfVKTJ4vWt5ubqDrYkiT5xBNVNzJWrBBT6QFg3TqRpEeOFEP2GnPggHiTcXMD4uMNMKef\nlIZLhCnc9evAf/4j+smbswJWfn7V8oZr1gAREWbYcLOwAHx8qn6+cEEka0DcgLC8NUW+Tx/gL3/R\n/zxduoiEbWdXszsnJqZ5fdUJCeKGZGYmcPw4YOLlHKh5mLAVYMUKseo7ALi7A3dMyKuXg4NI8pmZ\nYhKL2SXruowZI8qzFhaK1usvv4hRIK6uLTvu66+LY3l7N7lVrJN0OHX1FLo5dYOLnYvYOGKE+BTg\n4tJwtUEyS+wSUYBFi6pKTsfHN+/v+OZN0aj09m7aOHGzUl4u6nq7uQEBAa1++mWHluGHCz/AuZ0z\nPrz/Q7ja3XrTKCoS/1nVJqKR+WCXiMI984wYUebh0fxGl4ODLLlIGaytRYvbUDQasTJOr16iHncj\nTueeBgAUlBUgqzCrKmE7OjbwKjJnbGFXU1oKfPON+Dt++GHAQHV/kJ8PqNWilWsu08/N0sKFQFKS\n6MdesQLo1KnB3Q9lHsL6lPXw6eiDZ0OeZd1rAsAWdpN99RXw9dfisZ2dmC9hCIsWAb/9Jt4IVq40\n/qxGrRbYuBG4dg2IjhZzNkxGSYkYpufl1fb6eDMzxfeSEvGP30jCDvEIQYhHSCsERqaCCbua6i1q\nQ3YfFt5aOLu8XPwtG9vPP4shwYAY1fbyy8Y/Z6v58EMxTtHCQixpY6Bqegbx1FPA5s1iVEq/fnJH\nQyaICbuaKVNEy7pdu+YNn2vMnDliWJ6/f+sUe6s+4MHFxfjna1V5eWLSy7lzokDUsmVVY57lNmCA\n+CIyEvZhm6jjx0UJ19BQw/XFNyY5WbzZGaJGUr0uXxYV8y5cEDf2Jk8W/T5EJoLV+upw4gQwaxbw\n1luidIUSHT0qSjSvXy+6Pk6cEJVJATFDesyY1kvWO3eKBYjnzxcLEBtN9+7Aq69WLWczcKART0bU\ntphtl8jmzaKxdvmyGInVkkluclm1StTTPnMGSE8XCdzeXnTzdunSurHcvt8GiH9Toxo4UEz5Vqnq\nr8dBZILMNmH7+4sJZXZ2YtisEvXoIRK2o2NVQbriYiArq/UT9iOPiNa9ra2YAm90rLFBZsis+7DP\nnQM6dmz5rGS5aDSir7pnT5Gk164VJTGql30+d06Ug+7dG5g2reGVty5fFiPROCOSSD6s1mfG5s4V\nxeoAMa+jvkEMH38sFkLp2lWMljP1BYKbJCVFFIdqa+O9yaTxpqPCSZJUa1VvnU6HwtsDvBvg7i6+\nW1uLTxP1OXxYfM/Ortkfbbb27BEFnV59VQxsJ2oDmLDbOEmSMGfOHNx77724emsIiE6nw8yZMzFq\n1Cjk5eU1+PrnnwdeekmUWO7evf79HnlE9IWHhIhuFUCsWRkVBTz3nBj+bFaqv2vxHYzaCHaJtHHz\n5s3DkiVLAAC+vv2xZEkivv76daxbtwYAMHjwYPz4449wdnY2+LnfekssQwgAs2cbdjJRm3fjhqgH\nYmkpqm8ZaskyokawloiCBQcHw8LCAjqdDqdO/YaICPcaz/v7+8Phjg7nigpRB9/OTiwj2JyVrqoL\nCRFdJQ4OZtiN6+ICvPaa3FEQ1cCE3cY9+uijAIDHHnus1nPR0dH49NNPYXl71ZRbNm8Wi6kAIlmP\nGqXfucPDxarutra8CUnUFrAPu4UyMsQiAcYUGRkJa2v7WtvfeeedWskaqFlgqqXFpjp2ZLImaiv0\nTth5eXmYPHkyfH194efnh6SkJEPGpQiffSbWVZ01S6y7aAy3bzCWlxfXem7MmDGVNyKri4oSE4N6\n9ACGDjVOXETU+vRO2M8//zwiIiJw6tQpnDhxAr5mWJk/OVl8z8sTtYiMYfbs2VizZk21LVW9WL/9\n9htGjRqF/Pz8Gq/54w8gLQ24dEmMryYi06BXws7Pz8f+/fsRGxsLALCyskJ7M6zp8Mgj4t5UcLDx\nqmqGh4fD6lYFpx49ovHAA6WIitoEi1tTGUeMGFFrhEhhoSgGBYga3GZPq61aKZ1IwfQa1nf8+HHM\nnDkTfn5+SElJwaBBg7B06VLY21f1s3JYn+Fs27YN3333Hd5442NkZVli8GBgy5bPcejQISxduhSq\nW8NAvvoKWLNG1BVxcQHuuw948smGJ8yYvN9/B954Q7yDLVggFhcgasMMPjX9yJEjGDp0KA4ePIgh\nQ4bghRdegLOzM/75z3/WOOmbb75Z+XNoaChCQ0ObHz012aRJonJfbq4orzpzpthm1jZsEAt1AmLN\ntyeflDceojuo1Wqo1erKnxcsWGDYhJ2Tk4OhQ4fi4sWLAIADBw5g0aJF2LlzZ9WB2cJudfHxwO7d\nYnr5sGHi5yYs3m3azp4VLWxJAuLiAD8/uSMiapDBJ864u7vD09MTZ8+ehbe3NxITE+Fv1GVG2oa0\nNDGEb8gQ/SejGNMrrwCPP15Vca8txtjqvL1FuUJJAmxs5I6GqEX0npqekpKCGTNmQKPRoE+fPli3\nbl2NG4+m1sI+fBi43eMTHS1WpiIiMjSjTE0PDAzE4dsl3sxAdnbdj43h+HGxtJfZTQcnogZxanoT\nhYWJsdZFRcDUqcY7z+7douYQALz8MjB8uPHO1eZduSLqwip1hQkiA2PCbiI7O7FYt7FVXw/R6Gsj\ntmX79wPvvSeq5S1cCJjhxCyiO5lVwj5/XqzZ2qmT3JHUb9KkqoblAw/IHY2Mjh8XY6d1OrH4JhM2\nkfnUw/7qK1H7o107Uczfy0vuiKhBFy6IcYl2dmJYXufOckdE1CpYDxvAyZPie1mZmPzGhG0YP/wg\nSrkOHCgKYRlM797A6tUGPCCR8plNedUpUwAPD1HfWc4Kdr/9JpbtWrJELDSgdOvXiy6c3btFqVki\nMh6zaWH7+QErV8odheiWuXBBfP3lL2JVFyXz9wd++QVwc2vb9waITIHZJOy2om9f0T1jawt4esod\nTcu98oroYvLwEN3NRGQ8ZnPTsa2QJNEt0qWLaJUSEVVn8Gp9LT0pERHVraHcaTY3HYmIlI4Jm4hI\nIZiwiYgUggmbiEghmLCJiBSCCZuISCGYsE3I2bPAiRNyR0FExsKZjiai+hJmM2cC48bJGw8RGR5b\n2Cbijz/qfkxEpoMtbBMxdixw7hxQXMwFgolMFaemExG1IZyaTkRkApiwiYgUggmbiEghmLCJiBSC\nCZuISCFalLC1Wi2Cg4Mxfvx4Q8VDRET1aFHCXrp0Kfz8/KBSqQwVDxER1UPvhJ2ZmYldu3ZhxowZ\nHG9NRNQK9J7p+OKLL2Lx4sUoKCiod5+4uLjKx6GhoQgNDdX3dEREJkmtVkOtVjdpX71mOu7cuRO7\nd+/G8uXLoVarsWTJEvz3v/+teWDOdCQiajaDz3Q8ePAgduzYgV69eiEyMhJ79+7FtGnTWhQkERE1\nrMW1RPbt24f33nuPLWwiIgMwei0RjhIhIjI+VusjImpDWK2PiMgEMGETESkEEzYRkUIwYRMRKQQT\nNhGRQjBhExEpBBM2EZFCMGETESkEEzYRkUIwYRMRKQQTNhGRQjBhExEpBBM2EZFCMGETESkEEzYR\nkUIwYRMRKQQTNhGRQjBhExEpBBM2EZFCMGETESkEEzYRkUIwYRMRKQQTNhGRQjBhExEpBBM2EZFC\n6JWwMzIyMHLkSPj7+6N///5YtmyZoeMiIqI7qCRJkpr7opycHOTk5CAoKAhFRUUYNGgQtm3bBl9f\n36oDq1QEMqkpAAAG1klEQVTQ49BERGatodypVwvb3d0dQUFBAABHR0f4+voiKytL/wiJiKhRVi09\nQHp6Oo4dO4aQkJBaz8XFxVU+Dg0NRWhoaEtPR0RkUtRqNdRqdZP21atL5LaioiKEhobi9ddfx8SJ\nE2semF0iRETNZvAuEQAoLy/Hww8/jMcee6xWsiYiIsPTq4UtSRKio6PRsWNHfPDBB3UfmC1sIqJm\nayh36pWwDxw4gBEjRmDAgAFQqVQAgPj4eNx3331NOikREdXN4Am7pSclIqK6GaUPm4iIWhcTNhGR\nQjBhExEpBBM2EZFCMGETESkEEzYRkUIwYRMRKQQTNhGRQjBhExEpBBM2EZFCMGETESkEEzYRkUIw\nYRMRKQQTNhGRQjBhExEpBBM2EZFCMGETESkEEzYRkUIwYRMRKQQTNhGRQjBhExEpBBM2EZFCMGET\nESkEEzYRkUIwYRMRKYTeCTshIQE+Pj7o27cv3nnnHUPG1Cap1Wq5QzAYU7oWwLSux5SuBTCt62kL\n16JXwtZqtZg9ezYSEhJw8uRJfPnllzh16pShY2tT2sJ/lqGY0rUApnU9pnQtgGldT1u4Fr0S9q+/\n/oq77roLPXv2hLW1NaZOnYrt27cbOjYiIqpGr4R9+fJleHp6Vv7s4eGBy5cvGywoIiKqTSVJktTc\nF23duhUJCQlYvXo1AGDTpk04dOgQPvzww6oDq1SGi5KIyIzUl5at9DlY9+7dkZGRUflzRkYGPDw8\nmnRCIiLSj15dIoMHD8a5c+eQnp4OjUaDLVu2YMKECYaOjYiIqtGrhW1lZYWPPvoIY8eOhVarxRNP\nPAFfX19Dx0ZERNXoPQ77/vvvx5kzZ/D777/j1VdfrfGcqYzRzsjIwMiRI+Hv74/+/ftj2bJlcofU\nYlqtFsHBwRg/frzcobRYXl4eJk+eDF9fX/j5+SEpKUnukFokPj4e/v7+CAgIQFRUFMrKyuQOqcli\nY2Ph5uaGgICAym3Xr19HWFgYvL29ER4ejry8PBkjbJ66ruell16Cr68vAgMDMWnSJOTn57d6XAaf\n6WhKY7Stra3xwQcfIC0tDUlJSVi+fLlir+W2pUuXws/PzyRuCj///POIiIjAqVOncOLECUV/yktP\nT8fq1auRnJyM1NRUaLVabN68We6wmiwmJgYJCQk1ti1atAhhYWE4e/YsRo8ejUWLFskUXfPVdT3h\n4eFIS0tDSkoKvL29ER8f3+pxGTxhm9IYbXd3dwQFBQEAHB0d4evri6ysLJmj0l9mZiZ27dqFGTNm\nKP6mcH5+Pvbv34/Y2FgAopuuffv2MkelP2dnZ1hbW6O4uBgVFRUoLi5G9+7d5Q6ryYYPHw4XF5ca\n23bs2IHo6GgAQHR0NLZt2yZHaHqp63rCwsJgYSFSZkhICDIzM1s9LoMnbFMdo52eno5jx44hJCRE\n7lD09uKLL2Lx4sWVv3RKdvHiRXTu3BkxMTEYOHAgnnzySRQXF8sdlt5cXV0xd+5ceHl5oVu3bujQ\noQPGjBkjd1gtcuXKFbi5uQEA3NzccOXKFZkjMpy1a9ciIiKi1c9r8L9cU/iofaeioiJMnjwZS5cu\nhaOjo9zh6GXnzp3o0qULgoODFd+6BoCKigokJyfj6aefRnJyMhwcHBT1kftO58+fx7///W+kp6cj\nKysLRUVF+Pzzz+UOy2BUKpXJ5IaFCxfCxsYGUVFRrX5ugyfspozRVpLy8nI8/PDDeOyxxzBx4kS5\nw9HbwYMHsWPHDvTq1QuRkZHYu3cvpk2bJndYevPw8ICHhweGDBkCAJg8eTKSk5Nljkp/R44cwbBh\nw9CxY0dYWVlh0qRJOHjwoNxhtYibmxtycnIAANnZ2ejSpYvMEbXc+vXrsWvXLtneTA2esE1pjLYk\nSXjiiSfg5+eHF154Qe5wWuTtt99GRkYGLl68iM2bN2PUqFHYuHGj3GHpzd3dHZ6enjh79iwAIDEx\nEf7+/jJHpT8fHx8kJSWhpKQEkiQhMTERfn5+cofVIhMmTMCGDRsAABs2bFB0gwcQo98WL16M7du3\nw9bWVp4gJCPYtWuX5O3tLfXp00d6++23jXGKVrF//35JpVJJgYGBUlBQkBQUFCTt3r1b7rBaTK1W\nS+PHj5c7jBY7fvy4NHjwYGnAgAHSQw89JOXl5ckdUou88847kp+fn9S/f39p2rRpkkajkTukJps6\ndarUtWtXydraWvLw8JDWrl0rXbt2TRo9erTUt29fKSwsTLpx44bcYTbZndfz6aefSnfddZfk5eVV\nmQueeuqpVo9Lr1oiRETU+pQ/XICIyEwwYRMRKQQTNhGRQjBhExEpBBM2EZFCMGETESnE/wNuoZ7f\nkYrBoAAAAABJRU5ErkJggg==\n"
}
],
"prompt_number": 21
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"The K-Means algorithm"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Initialisation (step $s = 0$):\n",
"\n",
"- Choose $m=3$ centroids randomly drawn fron the $n$ points\n",
"\n",
"As long as $a_n$ at step $s$ differ from previous $a_n$, do :\n",
"\n",
"- Estimate the assignements $a_n$ by associating each points to its closest centroids according to the $L_2$ norm;\n",
"- Re-compute the $k = 1, \\ldots, m$ centroids by averaging coordinates $ \\{ x_i ; a_i = k \\} $;\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"step = 0\n",
"centroids_mu_m = x_n[np.random.choice(n, size = m, replace=False),:]\n",
"centroids_a_old_n = centroids_a_n = np.repeat(m, n)\n",
"dist_n = np.repeat(np.infty, n)\n",
"\n",
"def kmeans_iteration(x_n, centroids_mu_m):\n",
" dist_n_m = np.transpose([np.sum(np.abs(x_n - centroids_mu_m[k,:])**2, axis=1) for k in range(m)])\n",
" centroids_new_a_n = np.argmin(dist_n_m, axis=1)\n",
" centroids_mu_m = np.array([np.mean(x_n[centroids_new_a_n==k,:], axis=0) for k in range(m)])\n",
" return centroids_mu_m, centroids_new_a_n, np.min(dist_n_m, axis=1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"plt.scatter(x = x_n[:,0], y = x_n[:,1], s = 10, c = colors_m[centroids_a_n], marker = 'o', faceted=False, alpha=.6)\n",
"plt.scatter(x = centroids_mu_m[:,0], y = centroids_mu_m[:,1], s = 50, lw = 3, c = colors_m, marker = 'x', faceted=False)\n",
"plt.xlim([0, max(plt.xlim()[1], plt.ylim()[1])]);plt.ylim([0, max(plt.xlim()[1], plt.ylim()[1])]);\n",
"plt.title('Step = {}'.format(step))\n",
"print 'At step {}, {} assignements changed and total variance equals to {:.2f}'.format(step,\n",
" np.sum(np.where(centroids_a_n != centroids_a_old_n)[0]),\n",
" np.mean(np.sqrt(dist_n)))\n",
"\n",
"# Performing step\n",
"centroids_mu_m, centroids_new_a_n, dist_n = kmeans_iteration(x_n, centroids_mu_m)\n",
"centroids_a_n, centroids_a_old_n = centroids_new_a_n, centroids_a_n\n",
"step += 1"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"At step 5, 0 assignements changed and total variance equals to 1.08\n"
]
},
{
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAWwAAAEHCAYAAACKrHwgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XtcVHXeB/DPwIAgIALCIIJKIgqIYGpeUhszTG1x1WxT\n10va5dG2nuyp9rGtVu2GZW1qWttamm2m9WytmSGVa8NipmUooqKISnKRm9wZLgOc54/fclMuwzDD\nmTN83q8XL8fhzDnf424ffvzO76KSJEkCERFZPTu5CyAiIuMwsImIFIKBTUSkEAxsIiKFYGATESkE\nA5uISCEY2GR1jhw5gokTJ6Jv377w8vLCpEmTcOLECQDAhx9+iMmTJ8tc4c20Wi2cnZ3h5uYGNzc3\nhISEyF0S2SAGNlmV0tJS/OY3v8ETTzyBoqIiZGVlYe3atejVq5fcpbVLpVJh27ZtKCsrQ1lZGVJS\nUuQuiWwQA5usSmpqKlQqFe6//36oVCo4OTkhKioK4eHhSElJwapVq/Djjz/Czc0Nnp6eAIDq6mo8\n/fTTGDRoEHx9fbFq1SpUVVUBAHQ6Hfz9/RETEwNvb28EBgbik08+sUjtnINGlsbAJqsybNgw2Nvb\n44EHHkBcXByKiooavxcSEoK//vWvmDBhAsrKylBYWAgAWLNmDdLS0pCUlIS0tDRkZWXhxRdfbPxc\nbm4url+/juzsbOzatQuPPPIIUlNTW73+o48+Cg8Pj1a/IiMj26392Wefhbe3NyZNmoT4+Hgz/GsQ\n3UAisjIpKSnSAw88IPn7+0tqtVqaPXu2lJubK0mSJO3cuVOaNGlS47H19fWSi4uLdOnSpcb3jh49\nKgUGBkqSJEnff/+9pFarJb1e3/j93/3ud9JLL71k1pqPHz8ulZeXSzU1NdKuXbskNze3FjURmQNb\n2GR1hg8fjp07dyIjIwNnzpxBdnY2Vq9e3eqx+fn50Ov1GD16dGNLeObMmSgoKGg8xsPDA87Ozo1/\nHzRoELKzs81a82233QYXFxc4ODhg6dKluP322xEbG2vWaxAxsMmqDRs2DMuWLcOZM2cAiId7zfXr\n1w/Ozs44d+4cioqKUFRUhOLiYpSWljYeU1RUBL1e3/j3X3/9FQMGDGj1eitXrmwc6XHjV3h4uAXu\nkMh4DGyyKhcuXMBf/vIXZGVlAQAyMjKwZ88eTJgwAQCg0WiQmZkJg8EAALCzs8PDDz+M1atXIz8/\nHwCQlZWFb7/9tsV5165dC4PBgISEBHz99de47777Wr3+X//618aRHjd+JScnt/qZkpISfPPNN6iq\nqkJtbS12796NhIQEzJgxwyz/JkQNGNhkVdzc3HD8+HGMGzcOrq6umDBhAkaOHIk333wTADBt2jSE\nhYXB19cXPj4+AIDXXnsNQUFBGD9+PNzd3REVFdXioaKvry88PDzg5+eHJUuW4L333kNwcLDZajYY\nDHjhhRfg4+MDb29vbNu2DV9++SWCgoLMdg0iAFBJEscike3S6XRYsmQJMjIy5C6FqMvYwiYiUggG\nNtm8Gx9UEikVu0SIiBSCLWwiIoVQW+rE/DWUiMg0bXV8WLSFLUmSzXytXbtW9hp4L7Z/P7Z0L7Z2\nP911L+1hlwgRkUIwsImIFIKBbSStVit3CWZjS/cC2Nb92NK9ALZ1P9ZwLxYb1qdSqTrsjyEiopba\ny062sImIFIKBTUSkEAxsIiKFYGATESkEA5uISCEY2ERECsHAJiJSiHYDe8WKFdBoNC02H33mmWcQ\nEhKCiIgIzJs3DyUlJRYvkoiIOgjs5cuXIy4ursV706dPx9mzZ5GUlITg4GDExMRYtEAiIhLaDezJ\nkyfDw8OjxXtRUVGwsxMfGzduHDIzMy1XHRERNepSH/aOHTswa9Ysc9VCRETtMHkDg1deeQWOjo5Y\ntGhRm8esW7eu8bVWq7WKxVOIiKyJTqeDTqcz6tgOF39KT09HdHQ0kpOTG9/78MMPsX37dvzrX/+C\nk5NT6yfm4k9ERJ3WXnZ2uoUdFxeHjRs3Ij4+vs2wJqK2paUB588D48cD/frJXQ0pSbst7IULFyI+\nPh4FBQXQaDRYv349YmJiUFNTA09PTwDAhAkT8M4779x8YrawiW5SWgr86U+AwQD07w806zUkAtB+\ndnI9bKIu2LcPOHwYGDMGWLq04+OLikRg19cDXl7Aq69avkZSFgY2kQVIEvDooyJ8AWDjRqBPn44/\nd/IkcO4cMGUKEBBg2RpJeczah01EgkoFjBwJnDoF3HIL4Opq3OdGjRJfRJ3FFjZRF0gSUFAAeHoC\n9vZyV0O2gF0iREQKwT0diYhsAPuwSZGKioCPPhLdEMuWAW5ucldEZHlsYZMiffutGGmRnAzEx8td\nDVH3YGCTIvn7N70eMEC+Ooi6Ex86kmJduiS6RAYPlrsSIvPhKBEiIoXgKBEiIhvAwCYiUggGNhG1\nqqoKiI0FfvxR7kqoAcdhE1Gr/vEPICFBvO7dG4iIkLceYgub6CZ6PZCfL3cV8qutbf01yYctbKJm\nCgrEGtUVFcD8+UBUlNwVyee++8QM0r59gVtvlbsaAtjCJmrh8mUR1gBw5oy8tcjNxQW4915g2jSx\nlCzJj4FN1Ex4OBAUJDYiaGhdGwzy1kTUgBNniNogScC77wJJScDkycDixXJXRD0BJ84QmaC0VIQ1\nAPzwgwhwY1VUANnZlqmLei4GNlEb+vQBRowQr8eNM74ft6gI+POfgfXrxSa93eHKFeDIEaC6unuu\nR/LgKBGiNqhUwOOPA5WVgLOz8Z/LyADKy8Xr8+ctU1tzOTliA+C6OvGgdOVKy1+T5MEWNlEHOhPW\nABASIlrmHh7AzJmWqam5sjIR1gBQXGz565F8+NCRyAYcPAhcvQrcc0/LtcJJebi8KpGF1NeLLhMX\nF7krIVvBUSJEFlBZCTz6qHgg+fvfi66JBrW1Tf3YDWpqxGeITNVuYK9YsQIajQbh4eGN7xUWFiIq\nKgrBwcGYPn06itlpRj1URobYU7K+XjxcbJgZWVsLLFkCzJjRFOI1NWKq++zZYq0SIlO0G9jLly9H\nXFxci/c2bNiAqKgopKamYtq0adiwYYNFCyTqbnl5xo2hHjRIzIxUqYAhQ4ChQ8VY7aVLgb17xdjt\nmTOB69dFWH/1FXDoEPDb34oAJ+qsDvuw09PTER0djeTkZADA8OHDER8fD41Gg5ycHGi1WpxvZewS\n+7BJiVJSgC1bRKt5xQrR3dGRkhLAyUl0d1RVAZ9+Cqxe3fbxa9aIBaa4Pge1pr3s7PQ47NzcXGg0\nGgCARqNBbm5um8euW7eu8bVWq4VWq+3s5Yi61eXLIqwBIC3NuMB2dwfS04E33hDrjixZAmza1Hpo\nM6zpRjqdDjqdzqhjO93C9vDwQFFRUeP3PT09UVhYePOJ2cImBSouBrZuFWG9YgUwZUrT98rKAAcH\n0Zq+0eHDomUNAGPHim6RXr1uPq6kRMygJGqLWUeJNHSFAMC1a9fg4+PTteqIrEjfvmIN6F69gD17\ngGvXxPsnTgB//KNoIWdm3vy5sWNFP7ZGI0J+/vzWzz9smOhyYVuGTNHpwJ49ezZ27doFANi1axfm\nzJlj9qKI5NSwHkd9fdPDwaSkpjHXrU03d3MTgb5+vfjzq6+avtd8pmROjugSycszvp68PNHdsnVr\n01rd1DO1G9gLFy7ExIkTceHCBQQEBGDnzp1Ys2YNvvvuOwQHB+Pw4cNYs2ZNd9VK1C0eeEAsp7pk\niRgJAoi/u7iIFnR7u6+oVEB0dNPf16wRIbtpU9N7o0YB3t5tn8NgEFuUNbTCv/kGuHhRDCE8csTk\n2yIbwJmORGZQXQ28/bYYm71okRiTfekS8MorTQ8YN20CCguBdesAuzaaStXVQEyM6IqZNEn80IiP\nBz75RHzm8ceB0NBuuy2SgVlHiRDRzS5eFF+AGGv93HM3H9PeUL8G+flN/eanT4s/77gDCAgAHB25\nTkhPx6npFnC+4Dxe/+F1/OPcP/hbRg8xaJBYnQ8AIiNNP4+fn+hycXIC7r676f1bbmFYE7tELGK9\nbj2yy8VUuT9O/COGeA6RuSLqDtXVYv0QLy+5KyEl4+JP3czPzQ8A4KR2gqezp8zVUHfp1cs8YZ2V\nBZw61TSBh6gBW9gWUFtfi7N5Z+Hn5gdvl3aGAxDdIDsbePllsSHBHXeIB5jUs/ChYzdT26kR4Rsh\ndxlkZfLygM8+E1PZFywQsyZvlJ/ftHvMf+anETViYBMZIS8POHoUGD5cfJniyy/FWGoACAwUw/Zu\nFB4OaLVipEhbsyWp52JgExnhnXdEiH73nRhb3bdv58/xnzXTUFvb+jojgBhrvXCh6XWSbWNgk3Jk\nZIiFpj09xepKrfUpWEhDN4UkdbwOSHU18Pe/i4WeVq0CevcW70dHi5L37gV27BDnue02y9ZNtoWB\nTcqxf79YRg8AwsKA8eO77dKrVgEJCWJH9Ibx1m3ZvRt46y0RyJmZ4jUgZjyq1WKMdX29mA3ZWmCn\npYllXseP58p+1BKH9VmR2vpabDm+BY8ffBzfX/le7nKsT8PMEXt7wNe3Wy/t5wfcfz8wcmTHx1ZX\nN7XCb9wObMwYYMAA0aXS2vLw168Df/kL8PnnwHvvdblssjFsYVuRqyVXcTb/LADg0JVDmBo4VeaK\nrIjBIPoUAgOB//s/YONGkaDNF6y2Eg89JIK3sBB48smW3/PwAP7857Y/W13d1P3CDXvpRgxsK+Ln\n5geNiwa5FbmI0HBYICoqxK4Aly8DubmAjw8wZ07T2qTff298YF++LMbU+foCixeLvgkLcXAAnn/e\ntM/6+Ynu+YsXgbvuMm9dpHwMbCvipHbCC1NeQGl1Kbx6c34zvvkGOH5crILk7i6GUFRWiuDOy2t/\nndMb7dsHXLkiviIixBqnVqihK2XAgKZRJZ0RFwf8+CNw++3A9OnmrY3kx8C2Mg72DgzrBg3zvDUa\n8cSuXz8RthMnis7hzjyRCwgALlwQzd9u7v/ujOPHgY8+Eq/Ly4G5c43/bFUV8M9/itdffAFMndqt\nA2moG/SIwD5fcB7fXfoOYT5huDPwTrnLIWPdcYd4OmdvL/bWUqubFpfu7PCJ+fOBESPEDwFjt7Uz\nGERXTP/+ooYGer3ojvH2Nvu4PIOh6XXDbjfG6tVLtMyzssTPJ4a17ekRgb3z1E4UVxXjTP4ZjPAZ\nAR8X7kOpGBFm6stXqcSYPGPV1wOvvw5cvSo+13wx6z17gJ9+Eq/79DF96mMrbr9dtKz1emDWrM59\nVqUS25Ndvdq0Uw7Zlh4R2F7OXiiuKoaz2hm9HXrLXQ5ZQmYmkJIi+rXNsWReRYVIPkB0pUhSU+u+\ntrbpuOavzcDODpg50/TPOzkBwcHmq4esS49Yra+ipgJJuUkY4jEEGlcTnuSQdauqEpsnVlaKboqX\nXzbPeT/7DPj5ZzFg+p57mt4vLRUPRH18RLdNA0kCdu0SC4bccw9wJ7vfqPPay84eEdhk48rLgWee\nEd0YLi5i5okcrl0TGzYC8tZBisblVcm2ubqK2SqnTontzeXi5SVGoOTkcKdcsgi2sInMqboaKCgQ\nM2Aa+ryJOoFdIkRECsE9Hcky6us7XmvUnAoLgaKijo+rrpZ/Q8TCQvHAsqJC3jrIprAPm0xz6RLw\n9ttiQsmTTzatpGcpp08D774ruhkee6ztPuIjR8T6pl5ewP/+L+Dm1vY5DQbxA8Db27zdF9XVQEyM\nGE0ycCDw3HPmOzf1aCa3sGNiYhAWFobw8HAsWrQI1dXV5qyLrN1PP4lhdOXlQGKi5a93/rxoNdfV\niXHRbfnxR3Fcfr5YQaktNTXAq68CL7zQNBfcXCorRVgDTQtVtaesDPj4Y7GHWMNvBkeOAJs2Ab/8\nYt7aSNFMCuz09HRs374diYmJSE5ORl1dHfbu3Wvu2hQp9Xoq9l/Yj/yKfLlLsaxRowBHR8DZ2Xyz\nEdszebIYgeHnJ6YDtmXiRDH7xMcHGDq07eMKCsQW5YBovd+oK10qffuKpV+HDQOWLev4+H37xO4I\nsbHiB45eL35LSEkBdu6Uv3uHrIZJXSJ9+vSBg4MD9Ho97O3todfrMWDAAHPXpjglVSXYfHwzautr\ncfLaSazVrpW7JMsZPhx44w0Rjq0tWlFQIMLGwQFYsaLrW6f07w+sX9/xcbffLtb3aL7uSGu1rVwp\ntnTJzQVmzBDhvXKlGDu9e7d4f8kSYMIE0+q9807jJ8707t3ytaOj+PcqLhbbodnxURMJJgW2p6cn\nnnrqKQwcOBDOzs64++67cVcri/eua5hEAECr1ULb2hYbNqReqke9JFpDhnpDB0fbgLZ2kgXEbrUN\n23klJLScKVhfb9kQuvEHiMEgtherqxMt8FmzxGzE06cBnU4E5NSpQGqqeG/SJDHxJSHB9MC+kSQB\nx46J1+PHt/xh8tvfin50V1dg8GDRxfSHP4iJOJ1Z/4QUSafTQafTGXWsScP6Ll26hOjoaCQkJMDd\n3R333Xcf5s+fj9///vdNJ+6hw/pO5ZxCSn4KJg+aDP8+Fn4QZ81+/BH48EMRTH/4AxAeLt7/5BMg\nPl6s+fHII5YZq1xX17ThgVotppF/8YX4XkCAmLre8MzF0VH82bA0nloNzJsntoa5/34R5Obw/fdi\n911AnLe11rfBIHY+KC4Wv1E0a/BQz2H2mY4nTpzAxIkT4fWfRXbmzZuHo0ePtgjsnirSNxKRvpFy\nlyG/CRNEf7NaLdb8BEQr89//Fq8TE8XDNkvsMrtli3hIGRgoRoo4Ozd9b+xY0Wc8Z44I7eZrmKrV\nYoeb2bPF91xczFdT880d2xrqV1MjtloHRLdN8wWniGBiYA8fPhwvvfQSKisr4eTkhEOHDuE2M68L\nbMvq6utQXVdt+ysH3rjGp0olAvOnn8SwvPaG3Jmqrk6ENSB2l9HrxQNLR0exsl7DQ8l33xV96829\n/rpoXQPm30LsrrtELZLU9lYwLi7AokXAiRNi6zOGNd3A5JmOr7/+Onbt2gU7OzvceuuteP/99+HQ\nrO+wJ3SJSJKErLIsePf2Ri91O/25zZRWlyLmSAyKKouwKHwRyqrLkFaYht8E/wZDPIdYuGIrUVEh\nHq61FUhdbVnu3y+6XcaNA373u5u/n53d1Gfd3KBBok978GCxAmBamhjP/fHHYqjeihWWH29OPR6n\nplvI+4nv4+fsn6Fx0eD5Kc/D0d6xw8/8kv0L/pb4NwBine7rldcBAH6ufrY9qsQYpaXAm2+KWYIr\nVlhm38W2wrpBQ2jv3i3Wwy4rEy1fOzvRzfPAA+aviagZTk23kHP55wAAuRW5uK6/btRnhvUbBl9X\nX9ir7KEdrG0M+X69+1msTsU4e1asdFdTIyaOWEJ6utjsABDdHp9/Dhw82DTiJSdHhHlWlvh78/9w\n2hvXbaxffhE/EMy88QH1DJya3gXRwdGITYtFmHcYfF2N29jV1dEV67XrUS/Vw05lhxE+I5BZmokI\nTTdMPrF2wcHiIWR5eed2RG/Pzz+LGYRDhwJLl4o+7IMHxVC6Dz5o6rPetw9YsECM5Jg+XXTZxMeL\nPvfRo8VDSD+/zl+/vl48QPTyApKSgL+J365w/Tpw773muUfqMdglQtbFYBAtbHON0HjuORGYAPDs\ns6J/GhBD5/r2bXlsa+911dtvA2fOAEFBokvl738X70+aJCbmEN2AGxiQcjg4mHe77yFDRGC7u4vJ\nKYDox66svDmcuxLWqanAt9+KGaANk8jq6kRYA+IB5sqVYrGpsjIgOrrtc1l6YhEpFlvYNu6rC1/h\nxLUT0A7SYmqgmSaBKEl9vei31mhEqz01FXjrLfH+jBkizPv1E+OyuzIy5U9/Et0cgJjw0r+/eP35\n500jVoyZp/DLL8COHaKmp5+2zNBHsmpsYStcSVUJvr30Lfq79cekgZOM/lxZdRkOXDwAAPjs3GfQ\nDtZC1dPG9trZAbfc0vT3jIymxZS++KJpHY+BA0Vftam8vERgOzm17M65997O9VUnJIgHkjk5YvEn\nzm+gZhjYCrA7eTeScpMAiNEkw/sNN+pzvR16w9fVFznlOQjsG9jzwro1EyaI5VnLy8XaHUlJomXt\n7t61865aJfaUDAw0fvZmfb1YV1yjafrMmDGivj59zDMqhWwKu0QU4L0T7yExR6w5/fSEpzHUy/j/\nkPUGPTJLMzG472Cjxon3KLW1YtZlv35ihEp3++gj4IcfxA+OP/+56YeGXi9mZpp7tiUpArtEFG7x\nyMXwc/ODr6tvp8IaEK3sYC8ZwkgJ1GoxzM9cDAaxCqC/v1h4qiOXL4s/y8vFRgcNgd3bxpcsIJOx\nhd1MdW014tLi4GDvgLuH3A17O3uznLesugzHs44jsG9gz5l+3hO9+67oFnFyEg8ePTzaPz4pSfSj\n33KLGOLHkSEEtrCNdjDtIA6mHQQAOKmdcGegkQvQd+Bvv/wNqYWpUNup8aL2RXj19jLLedtSL9Xj\nnyn/RHFVMeaGzIWns6dFr9etqqvFutL9+8vTjdGenBzxZ1WVGNPdUWBHRHTPbj1kMxjYzdirmlrU\najvz/dNUGMRymrX1taiqrWr32Nr6Wtir7Fs8IDTUGaC2Uxv90PDnrJ/x7eVvAYjwfnj0wyZWboU+\n+kisZmdnJ4bSBQTIXVGTRYuAAwfE2O/AQLmrIRvEwG5m5tCZcFI7wdHeEZMHTjbbeZdHLsd3l79D\nkGcQBvRpeys1Q50BCz5fAD83P2yZsQUqlQpVtVWY++lcjPIdhVfufMWo0HZ3cm/1tU0oKxOr/Z09\nKzYiaD7mWW7DhokvIgthH7aVaAjrL1LEziiP3fYYXr/rdcz7bB7i0uIAAM9Oetbo0E7JT0FRVRHG\nDRhntr74jpzNOwtHe8dOPxjtlNxcYO1asZJeQICY/DJ3ruWuR9TN2IfdigsFF7A7eTc0Lho8PPph\n2Ye8qVQqONg1Tcne+tNWbP1pa4tjHO0dW4T1mbwzOJB6AMFewZgzfA4uXr8IbxdveDp7IsS7e/cC\n/P7K99h7VmyBtXL0Sozqb4GlUQExZvnxx4Ft20S3SFiYZa5DZIV6bGB/ffFr5FbkIrciF8m5yRjt\n14VZbmagtlPj43kfAwA+PfvpTd9fe8darNOua/He3jN7ka/Px5XiK7hachUpBSlwVjvjhSkvWPzB\n5o1yynMaX+dW5Fr2YmFhwGuvidecuk09SI8dRxTkGQRAjAaxls1y1XZqvD/7/Va/t/aOmzc3GOAm\n+sN7O/RGYWUhAKCythJ5FXmWK7INM4fORIQmAmP9xmLKoCmWv6CbG8Oaepwe3Yf9a/GvcHdyR18n\nMy+paaKGB4wNfdbNPXbbY40PIhsY6gxIKUjBALcByKvIw+cpnyOgTwCWRCyBnUr8LE4vTsf+C/sR\n0CcAc4bPabf/O7c8Fx7OHrJ3DxH1ZNwiTAGqa6sx59M5rYZ1g8dvexybZ2zu1JogG45swJXiKwCA\n/xn/PxjWr/VRDHuS90D3qw7evb3x3OTn4Ozg3OpxPcr586Kf3NrGe5NN4xZhCuBg7wA/t6YdTdbe\nsRaGFwy4P+z+xvcGug/s9AJODVuPqe3U7f4mkZyXDADI1+e36I/usY4cEcuwvvmm2LWGyAr02IeO\n1sZOZYft0dsBAAF9AhofMDY8iBzjNwZPT3y60+ddFrEMEZoI9HfrD42rps3jZgTNwD/P/xNDPYdi\noPtAAGLPyvcT34eHkweeGP8E+vQychU6W5CT0/prIhmxS8TKSJJ0Uyu64b2iyiJcLbmKEO+Qbuln\nfufndxqXdV0yckmn1uJWvNJSsXO6nR2weLH5tiwj6gDHYStIa10eKpUKeoMeryS8grKaMoT0C8Hq\n8avbPEdtfS0Sfk2Ak9oJ4/3Hm7wO9kjNSJzOPY3eDr0x1LOHrc3cp49Y45rIijCwFaJQX4hLRZfg\n4uCCrLKsdo+NvRiLry9+DUCE/Xj/8SZdc9LASRjhMwK97HvxISSRFeBDxy66VnYNeoPe4tf5+uLX\nqDRU4lLRJcwMmtnusc0XmOposamO9HXqy7AmshImB3ZxcTHmz5+PkJAQhIaG4tixY+asSxH2nd+H\ndfHrsFa3FsVVxRa9Vl5FHmrrayFJUofPBqKDo3FL31ugcdFglK+FpogTUbczObCfeOIJzJo1Cykp\nKTh9+jRCQrp37QprcC7/HACgtLoUmaWZFr3WBP8J0Bv0cHdyx7HM9n84Zpdl43LxZeRW5GLPmT0W\nrYuIuo9JgV1SUoKEhASsWLECAKBWq+He1U1MFWhG0Ay493JHaL9QDPOy7LKakf0jMdpvNAb3HYz+\nbu0vJ6o36FEviZ3Ba+trLVqXItTXN+2UTqRgJg3rO3XqFP7rv/4LoaGhSEpKwujRo7F582b0brYX\nHYf1mV9ueS6ulV/DCJ8RrW6wEHsxFp8kf4Jr5dfQz7kfpgZOxYIRC6xm6r0sfv0V2LxZBPZ//7fY\njovIipl9WF9tbS0SExOxdetWjB07FqtXr8aGDRvw4osvtjhu3bp1ja+1Wi20Wq0pl6P/0Lhq2p38\nEnsxFpmlmciryIN/H3/c4nFLzw5rAEhMFBseAGKnGgY2WRmdTgedTmfUsSa1sHNycjBhwgRcuSLW\nqDhy5Ag2bNiAAwcONJ2YLexu996J9/CvK/9CRmkGJg2chGcmPgMfFyN277Zl6enApk2AJIl1tIOC\n5K6IqF0WWfxpypQpeP/99xEcHIx169ahsrISrzWsUdzBRZXq4vWLqKytRLhPuMmTUSypXqpHXkUe\nPJw8btrsoEerrRWB7eDQ8bFEMrNIYCclJeGhhx5CTU0NhgwZgp07d7Z48GhrgX069zS2/bwNADB3\n+FzMCJohc0VEZIssMjU9IiICP/egVcwK9AWNr/Mr8i16rZT8FNjb2SPYi8t6ElETTk030u0Bt+Nq\nyVXoDXrcE3yPxa7z71//jd3JuwEAD9/6MMb4jbHYtaze9euAWg30wCGjRK1hYBupl7oXHoh8wOLX\nabE3YrmF90a0ZidOAB98ANjbA08+CQwZIndFRLLrUYF9teQq3Bzd4OHsIXcpbbp7yN24rr8OtZ0a\n2sFaucv2XxV6AAAKAUlEQVSRT0pK04SX1FQGNhF60HrYBy8exL4L++Bo74hnJz3bYncXskIZGcB7\n7wFOTsCjjwKennJXRNQtuB42gLTCNABATV0NrpZcZWCbyQ9Xf0BsWizCvMOwKHyR+U4cEAC8/LL5\nzkdkA3rM8qozh86Er6svRniPQKRvpGx1XLx+ES//+2XsOLnDJtb5+OL8FyjQFyD+13hcK7smdzlE\nNq3HtLCDPIOwXrte7jKw7/w+ZJRmIKM0A6P7j0aEb4TcJXXJUM+hOJlzEv1694OnM7stiCypxwS2\ntRjUdxDSitLQy75Xh6vuKcEjox9BenE6fF190UvdS+5yiGxaj3noaC0kScLFwovwcvaCV28vucsh\nIitjkanpXbkoERG1rr3s7DEPHYmIlI6BTUSkEAxsIiKFYGATESkEA5uISCEY2ERECsHAtiHpxek4\nX3Be7jKIyEI409FGNN/CbEHYAkwNnCpzRURkbmxh24jmCy9ll2XLWAkRWQpb2DZi8qDJSC9OR1Vt\nFTcIJrJRnJpORGRFODWdiMgGMLCJiBSCgU1EpBAMbCIihWBgExEpRJcCu66uDqNGjUJ0dLS56iEi\nojZ0KbA3b96M0NBQqFQqc9VDRERtMDmwMzMzERsbi4ceeojjrYmIuoHJMx2ffPJJbNy4EaWlpW0e\ns27dusbXWq0WWq3W1MsREdkknU4HnU5n1LEmzXQ8cOAADh48iG3btkGn0+HNN9/EV1991fLEnOlI\nRNRpZp/pePToUezfvx+BgYFYuHAhDh8+jKVLl3apSCIial+X1xKJj4/HG2+8wRY2EZEZWHwtEY4S\nISKyPK7WR0RkRbhaHxGRDWBgExEpBAObiEghGNhERArBwCYiUggGNhGRQjCwiYgUgoFNRKQQDGwi\nIoVgYBMRKQQDm4hIIRjYREQKwcAmIlIIBjYRkUIwsImIFIKBTUSkEAxsIiKFYGATESkEA5uISCEY\n2ERECsHAJiJSCAY2EZFCMLCJiBSCgU1EpBAMbCIihTApsDMyMjB16lSEhYVhxIgR2LJli7nrIiKi\nG6gkSZI6+6GcnBzk5OQgMjIS5eXlGD16NPbt24eQkJCmE6tUMOHUREQ9WnvZaVIL29fXF5GRkQAA\nV1dXhISEIDs72/QKiYioQ+quniA9PR0nT57EuHHjbvreunXrGl9rtVpotdquXo6IyKbodDrodDqj\njjWpS6RBeXk5tFotnn/+ecyZM6flidklQkTUaWbvEgEAg8GAe++9F4sXL74prImIyPxMamFLkoRl\ny5bBy8sLb731VusnZgubiKjT2stOkwL7yJEjmDJlCkaOHAmVSgUAiImJwYwZM4y6KBERtc7sgd3V\nixIRUess0odNRETdi4FNRKQQDGwiIoVgYBMRKQQDm4hIIRjYREQKwcAmIlIIBjYRkUIwsImIFIKB\nTUSkEAxsIiKFYGATESkEA5uISCEY2ERECsHAJiJSCAY2EZFCMLCJiBSCgU1EpBAMbCIihWBgExEp\nBAObiEghGNhERArBwCYiUggGNhGRQjCwiYgUwuTAjouLw/DhwzF06FC89tpr5qzJKul0OrlLMBtb\nuhfAtu7Hlu4FsK37sYZ7MSmw6+rq8NhjjyEuLg7nzp3Dnj17kJKSYu7arIo1/I9lLrZ0L4Bt3Y8t\n3QtgW/djDfdiUmD/9NNPCAoKwuDBg+Hg4IAFCxbgyy+/NHdtRETUjEmBnZWVhYCAgMa/+/v7Iysr\ny2xFERHRzVSSJEmd/dDnn3+OuLg4bN++HQDw8ccf4/jx43j77bebTqxSma9KIqIepK1YVptysgED\nBiAjI6Px7xkZGfD39zfqgkREZBqTukTGjBmDixcvIj09HTU1Nfj0008xe/Zsc9dGRETNmNTCVqvV\n2Lp1K+6++27U1dXhwQcfREhIiLlrIyKiZkwehz1z5kxcuHABaWlpePbZZ1t8z1bGaGdkZGDq1KkI\nCwvDiBEjsGXLFrlL6rK6ujqMGjUK0dHRcpfSZcXFxZg/fz5CQkIQGhqKY8eOyV1Sl8TExCAsLAzh\n4eFYtGgRqqur5S7JaCtWrIBGo0F4eHjje4WFhYiKikJwcDCmT5+O4uJiGSvsnNbu55lnnkFISAgi\nIiIwb948lJSUdHtdZp/paEtjtB0cHPDWW2/h7NmzOHbsGLZt26bYe2mwefNmhIaG2sRD4SeeeAKz\nZs1CSkoKTp8+rejf8tLT07F9+3YkJiYiOTkZdXV12Lt3r9xlGW358uWIi4tr8d6GDRsQFRWF1NRU\nTJs2DRs2bJCpus5r7X6mT5+Os2fPIikpCcHBwYiJien2uswe2LY0RtvX1xeRkZEAAFdXV4SEhCA7\nO1vmqkyXmZmJ2NhYPPTQQ4p/KFxSUoKEhASsWLECgOimc3d3l7kq0/Xp0wcODg7Q6/Wora2FXq/H\ngAED5C7LaJMnT4aHh0eL9/bv349ly5YBAJYtW4Z9+/bJUZpJWrufqKgo2NmJyBw3bhwyMzO7vS6z\nB7atjtFOT0/HyZMnMW7cOLlLMdmTTz6JjRs3Nv6fTsmuXLkCb29vLF++HLfeeisefvhh6PV6ucsy\nmaenJ5566ikMHDgQfn5+6Nu3L+666y65y+qS3NxcaDQaAIBGo0Fubq7MFZnPjh07MGvWrG6/rtn/\ny7WFX7VvVF5ejvnz52Pz5s1wdXWVuxyTHDhwAD4+Phg1apTiW9cAUFtbi8TERDz66KNITEyEi4uL\non7lvtGlS5ewadMmpKenIzs7G+Xl5di9e7fcZZmNSqWymWx45ZVX4OjoiEWLFnX7tc0e2MaM0VYS\ng8GAe++9F4sXL8acOXPkLsdkR48exf79+xEYGIiFCxfi8OHDWLp0qdxlmczf3x/+/v4YO3YsAGD+\n/PlITEyUuSrTnThxAhMnToSXlxfUajXmzZuHo0ePyl1Wl2g0GuTk5AAArl27Bh8fH5kr6roPP/wQ\nsbGxsv0wNXtg29IYbUmS8OCDDyI0NBSrV6+Wu5wuefXVV5GRkYErV65g7969uPPOO/HRRx/JXZbJ\nfH19ERAQgNTUVADAoUOHEBYWJnNVphs+fDiOHTuGyspKSJKEQ4cOITQ0VO6yumT27NnYtWsXAGDX\nrl2KbvAAYvTbxo0b8eWXX8LJyUmeIiQLiI2NlYKDg6UhQ4ZIr776qiUu0S0SEhIklUolRURESJGR\nkVJkZKR08OBBucvqMp1OJ0VHR8tdRpedOnVKGjNmjDRy5Ehp7ty5UnFxsdwldclrr70mhYaGSiNG\njJCWLl0q1dTUyF2S0RYsWCD1799fcnBwkPz9/aUdO3ZI169fl6ZNmyYNHTpUioqKkoqKiuQu02g3\n3s8HH3wgBQUFSQMHDmzMglWrVnV7XSatJUJERN1P+cMFiIh6CAY2EZFCMLCJiBSCgU1EpBAMbCIi\nhWBgExEpxP8D6dvukNQMFaYAAAAASUVORK5CYII=\n"
}
],
"prompt_number": 29
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Once the algorithm has converged, we calculate the misclassified errror has :"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import itertools\n",
"import operator\n",
"\n",
"def find_minimal_error_and_permutation(real_a_n, estimated_a_n, m):\n",
" return sorted([{'permutation':np.array(p), 'error':100 * np.average(real_a_n != np.array(p)[estimated_a_n])} for p in itertools.permutations(range(m))],\n",
" key=operator.itemgetter('error'))[0]\n",
"\n",
"error_and_permutation = find_minimal_error_and_permutation(real_a_n=assignements_n, estimated_a_n=centroids_a_n, m=m)\n",
"print 'Minimum error of {error}% with re-ordering {permutation} of clusters'.format(**error_and_permutation)\n",
"estimated_probs_m = np.array([np.sum(centroids_a_n==k,dtype='float') for k in range(m)])[error_and_permutation['permutation']] \n",
"print 'The estimated probabilities of each cluster are : {}'.format(estimated_probs_m)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Minimum error of 16.0% with re-ordering [0 2 1] of clusters\n",
"The estimated probabilities of each cluster are : [ 40. 27. 33.]\n"
]
}
],
"prompt_number": 30
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The Expectation-Maximization algorithm\n",
"--------------------------------------\n",
"\n",
"EM is an general purpose optimization method for deriving hidden parameters of probabilistic models. It is formulated as to find the maximum likelihood estimator of parameters of a probability distribution which is partially observed. It fits well to model problems with hidden or latent variables. The term was coined by Dempster, Laird and Rubin in [their original paper](http://web.mit.edu/6.435/www/Dempster77.pdf) and it is widely used in the context of [mixture models](http://en.wikipedia.org/wiki/Mixture_model). The paper [\"What is the expectation maximization\n",
"algorithm?\"](http://ai.stanford.edu/~chuongdo/papers/em_tutorial.pdf) is a nice introduction to it.\n",
"\n",
"Our toy problem actually fits well that framework if we write :\n",
"\n",
"- $p = \\big(\\mu^{1} = (\\mu^{1}_1, \\ldots, \\mu^{1}_d), \\ldots, \\mu^{m}, \\pi_1, \\ldots, \\pi_m, \\Sigma_1, \\ldots, \\Sigma_m \\big)$ the parameters of our mixture of gaussian model;\n",
"- $(a_i)_{i=1,\\ldots, n})$ the hidden or latent assignements of points to gaussians;\n",
"\n",
"The EM algorithm decomposes itself in two steps :\n",
"\n",
"- The E or Expectation step : estimation of the latent variables behind the observed sample. In our example, those variables are the assignements of the $i=1,\\ldots,n$ points to one of the $k=1,\\ldots,m$ components, whom we estimate the probabilities $P[A = k | x = x_i] = P[a_i = k | x_i]$.\n",
"\n",
"\n",
"- The Maximization step : estimation of parameters $p$ through the maximum likelihod estimator. In the gaussian case, the maximum likelihood estimator of $p$ has the nice property to have two independent closed forms :\n",
"<center> $\\pi_k = \\frac{1}{n} \\sum \\limits_{i=1}^n \\ p[a_i = k]$ and $\\mu^{k} = \\frac{\\sum \\limits_{i=1}^n p[a_i = k] x_i}{\\sum \\limits_{i=1}^n p[a_i = k]}$</center>\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here are the details for calculating the assignements probabilities $P[a_i = k|x_i]$ : \n",
"\n",
" - Knowing that $P[x_i | a_i = k ] = \\frac{P[a_i = k, x_i]}{P[a_i = k]} $ and $P[x_i] = \\sum \\limits_k^m P[x_i | a_i = k ]$\n",
" - And that $ P[a_i = k] = \\pi_k$ and $ P[x_i | a_i = k ] = \\text{pdf}(x_i; \\mu_k, \\Sigma_k^{-1}) \\propto \\frac{1}{|\\Sigma^k|} \\text{exp} \\Big( - \\frac{1}{2}(x_i - \\mu^k)^{T} \\Sigma_k^{-1} (x_i - \\mu^k) \\Big)$ \n",
"\n",
"<center> $P[a_i = k | x_i] = \\frac{P[a_i = k, x_i]}{P[x_i]} = \\frac{P[x_i | a_i = k ] P[a_i = k]}{\\sum \\limits_k^m P[x_i | a_i = k ]} = \\pi_k \\ \\frac{1}{|\\Sigma^k|} \\text{exp} \\Big( - \\frac{1}{2}(x_i - \\mu^k)^{T} \\Sigma_k^{-1} (x_i - \\mu^k) \\Big) \\frac{1}{\\sum \\limits_k^m \\ldots} $ </center>\n",
"\n",
"It is proven that the EM algorithm converges to a local maxima of the likelihood $L(x_1, \\ldots, x_n; p) = \\Pi_i^n P[x_i]$\n",
"\n",
"<center> $\\text{ln} L(x_1, \\ldots, x_n; p) = \\sum \\limits_i^n \\sum \\limits_k^m \\frac{\\pi_k}{|\\Sigma_k|} \\text{exp} \\Big( - \\frac{1}{2}(x_i - \\mu^k)^{T} \\Sigma_k^{-1} (x_i - \\mu^k) \\Big)$ </center>\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def em_iteration(x_n, centroids_mu_m, prob_m, cov_m):\n",
" # cov_m.shape = (m) => spherical covariances \n",
" # E-step\n",
" dist_n_m = np.transpose([np.sum(np.abs(x_n - centroids_mu_m[k,:])**2, axis=1) / (2 * cov_m[k]) for k in range(m)])\n",
" pa_n_m = np.exp(-dist_n_m) * prob_m / cov_m[k]\n",
" pa_n_m = (pa_n_m.T / np.sum(pa_n_m, axis=1)).T\n",
" # M-step\n",
" prob_m = np.mean(pa_n_m, axis=0)\n",
" centroids_mu_m = np.array([np.sum(x_n.T * pa_n_m[:,k], axis=1) / np.sum(pa_n_m[:,k]) for k in range(m)])\n",
" return centroids_mu_m, prob_m, pa_n_m, dist_n_m"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 31
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Initialisation :"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"step = 0\n",
"\n",
"centroids_mu_m = x_n[np.random.choice(n, size = m, replace=False),:]\n",
"estimated_prob_m = np.repeat(1.0/m, m)\n",
"old_pa_n_m = pa_n_m = np.repeat(1.0/3.0, n * m).reshape(n,m)\n",
"sigma_m = np.repeat(1, m)\n",
"dist_n = np.repeat(np.infty, n)\n",
"\n",
"plt.scatter(x = x_n[:,0], y = x_n[:,1], s = 10, c = 'k', marker = 'o', faceted=False, alpha=.6)\n",
"plt.scatter(x = centroids_mu_m[:,0], y = centroids_mu_m[:,1], s = 50, lw = 3, c = colors_m, marker = 'x', faceted=False);\n",
"plt.xlim([0, max(plt.xlim()[1], plt.ylim()[1])]);plt.ylim([0, max(plt.xlim()[1], plt.ylim()[1])]);"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAWwAAAD5CAYAAAADQw/9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XdYVGe+B/AvQxEF6TBUFQtKE7DEio6xm+Ba2I1yLZHV\nZHWTVTc9MQlmVcyaxGhictN0NSG6uTGiaxCjq0MwdlFERBSVANJFpAzS5tw/iBNR6jDD4Qzfz/Ps\n8+DhzDm/s7t8eXnPW4wEQRBAREQdnkzsAoiIqGUY2EREEsHAJiKSCAY2EZFEMLCJiCSCgU1EJBEm\n+rqwkZGRvi5NRGTQGhttrdcWtiAIBvOft99+W/Qa+CyG/zyG9CyG9jzt9SxNYZcIEZFEMLCJiCSC\ngd1CCoVC7BJ0xpCeBTCs5zGkZwEM63k6wrMYCc11mmh7YSOjZvtjiIiovqayky1sIiKJYGATEUkE\nA5uISCIY2EREEsHAJiKSCAY2EZFEMLCJiCSiycAODw+HXC6Hv7+/5thLL70Eb29vBAQEYNasWbh7\n967eiyQiomYCe9GiRYiNja13bNKkSUhOTkZiYiK8vLwQGRmp1wKJiKhOk4EdHBwMW1vbescmTpwI\nmazuY8OGDUNWVpb+qiMiIo029WFv3boV06ZN01UtRETUBK03MFi7di3MzMwQFhbW6DkRERGarxUK\nRYdYPIWIqCNRKpVQKpUtOrfZxZ/S09MREhKCpKQkzbF//etf+OKLL/Df//4X5ubmDV+Yiz8REbWa\nThd/io2NxYYNG7B3795Gw5qIGpeWlob9+/ejsLBQ7FJIYppsYc+dOxdxcXEoLCyEXC7H6tWrERkZ\niaqqKtjZ2QEARowYgU8++eTRC7OFTfSIkpISvP7666iuroaLi0u9bkMioOnsbLIPe+fOnY8cCw8P\n101VRAYgOjoaR44cwZAhQ7BgwYJmz6+trUVtbS0AoKqqSt/lkYHhBgZEWhIEAcuWLYNarQYAbNiw\nAVZWVs1+7vz587h8+TLGjBkDDw8PfZdJEqN1C5uIGmdkZISBAwfiwoUL6N27NywtLVv0uaCgIAQF\nBem5OjJEbGETtYEgCCgsLISdnR2MjY3FLocMQFPZycAmIupAuKcjEZEBYB82SdKdO3ewY8cOGBsb\nY+HChejevbvYJRHpHVvYJEk//fQTLl++jKSkJMTFxYldDlG7YGCTJLm7u2u+dnNzE7ESovbDl44k\nWdevX4exsTF69eoldilEOsNRIkREEsFRIkREBoCBTUQkEQxsImrQvXv3EBMTgxMnTohdCv2G47CJ\nqEHff/894uPjAQDdunVDQECAyBURW9hED1GpVCgoKBC7DNHV1NQ0+DWJhy1sogcUFhZi3bp1KC8v\nR2hoKCZOnCh2SaL54x//iO7du8PGxgaDBg0SuxwCW9hE9dy4cQPl5eUAgEuXLolcjbgsLCwwe/Zs\njB8/HkZGRmKXQ2BgE9Xj7++Pvn37wsrKStO6rq6uFrkqojqcOEPUCEEQ8OmnnyIxMRHBwcGYN2+e\n2CVRJ8CJM0RaKCkpQWJiIgDgl19+aVUDpLy8HNnZ2foqjTopBjZRI6ysrODn5wcAGDZsWIv7ce/c\nuYO33noLq1evRnR0tD5L1Lh58yaOHTuGysrKdrkfiYOjRIgaYWRkhOeffx4VFRXo2rXroycIAvBw\niAsCMjMzUVZWBgC4cuWK3uvMzc3Fhg0bUFtbi0uXLuEvf/mL3u9J4mALm6gZDYb1sWPAmDHA7du/\nHzt0CHj8cXi7usLPzw+2traYOnWq3usrLS1FbW0tAKC4uFjv9yPx8KUjUWsdOwZMnQqUlQEBAcB/\n/wskJADTpwP37gHDhgEHDwLW1u1W0oEDB5CRkYEnnnii3lrhJD1NZSe7RIha6+ZN4Lex2khMBBwc\n6n8/Oxu4c6ddA7s9WvIkPnaJELXW/PnAtm0QGnoJ6eEBKJUAN1UgPWgysMPDwyGXy+Hv7685VlRU\nhIkTJ8LLywuTJk1inxl1TgsXojQk5NHju3cDvXu3fz3UKTQZ2IsWLUJsbGy9Y+vXr8fEiRNx9epV\njB8/HuvXr9drgUTtLT8/v/kx1IcOoftPPz16fMmS+i8iiXSo2ZeO6enpCAkJQVJSEgBgwIABiIuL\ng1wuR25uLhQKRYNDl/jSkaQoJSUFmzdvhlqtRnh4OIYNG/boSYcO/f6CsSH3X0Ta2+u3WDJIOn3p\nmJeXB7lcDgCQy+XIy8tr9NyIiAjN1wqFAgqForW3I2pXN27cgFqtBgCkpaU1HNh37wL31xf5rc+6\nYM8e2L/4ImQAKgoL0bWiov2KJklTKpVQKpUtOrdNo0SMjIyanP31YGATScGoUaNw/vx5pKWlwcPD\no973SktLYWpqCvPQUGDXLuCVV4BDhyB49kZSUBB+VSgw9fx5HFm+HHPd3NHQT4YgCFz5jup5uDG7\nevXqRs9t9SiR+10hAJCTkwMnJ6fWV0jUQdnY2KB79+7o0qULdu7ciZycHADA2bNn8fLLL+PVV19F\nVlYWEBoKXL6MSrfemD0byM0dgYKpU/HZsmUIevJPCAmpG4r9oO+//x7Lli3D559/zu5C0kqrA3v6\n9OnYvn07AGD79u2YMWOGzosiEtP99TjUajWqqqoAAImJiVCr1aioqNC8s6lEF4SGAnv2AAsXdkW/\nfi/jpVXr8NxzPfHjj8Af/lA/tI8cOQK1Wo1z5861anRVfn4+3nvvPXz88ceatbqpc2oysOfOnYuR\nI0ciNTUVHh4e2LZtG1599VUcOnQIXl5eOHLkCF599dX2qpWoXTz99NMIDg7G/Pnz0bNnTwBAcHAw\nLCwsIJfLNbuvFBcDqal1n6mpAWbNAiwsgCNH6o5VVgIXL/5+3SFDhgAAvLy8YN3EpJrq6moUFBRo\nWuEHDx7EtWvXkJSUhGPHjun4aUlKODWdqA2yswGFArh27dHvrVsHvPba7/8WBAGlpaWwtLSETNZw\nW6myshKRkZHIycnB6NGjMX/+fMTFxeHbb7+FTCbD888/Dx8fH/08DHUInJpOpCeurkBMDNCvX/3j\njz1WP6yBuh9EKyurJq9XUFCg6Te/+FvzfOzYsfDw8ICZmRnXCenkODVdD65cuYJ//vOf+P777/lX\nhoFTqYBnn330eEJCXd92a7m6umLQoEEwNzfH5MmTNcd79+7NsCZ2iejD6tWrNTPlXn75ZfTp00fk\nikgfVCogJOT3PuuHmZgA330HzJzZvnWRtHGLsHbm6uoKADA3N4ednZ3I1ZC+qFRAQcHv/163Drh1\n6/fukZqauj7u1rp16xYuXLigmcBDdB9b2HpQU1OD5ORkuLq6wtHRUexySI8KCoDx44G5c3/vs87O\nBsaNA55/HnjuudZdLzs7G2vWrEFtbS3Gjh2LsLAw3RdNHRpfOrYzExMTBAQEiF0GtQNHR+DUKeDB\nTWlcXYELF+ofA+rGU3/33XewtrbGnDlzYGpq+sj1CgoKNLvH3J+gRnQfA5uoBfLz83H8+HEMGDAA\nAwYMqPe9hnYQa+jY3r17NYuoeXp6YvTo0Y+c4+/vD4VCgZycHISGhuqkdjIcDGyiFvjkk0+Qk5OD\nQ4cOYe3atbCxsWn1Ne4vmlZTU4MuXbo0eI5MJsPcuXPbVCsZLgY2SUZmZiZ27doFOzs7LFiwoMEu\nBX25300hCEKz72YqKyvx9ddf4+7du1i6dCm6desGAAgJCYGpqSl27dqFrVu3QhAEPPbYY3qvnQwH\nA5skY9++fUhLSwMA+Pr6Yvjw4e1276VLlyI+Ph7e3t6wtbVt8tyoqChs3LgRgiAgKysLGzduBFD3\nMsnExATm5uZQq9W4dOlSg4GdlpaGGzduYPjw4c1OtKHOhcP6OpCamhps3rwZzz//PI4ePSp2OR3O\n/YkjxsbGcHZ2btd7u7q64qmnnsLAgQObPbeyslLTClepVPW+N2TIELi5ucHGxqbB9eFv376NDz74\nALt378Znn32mk9rJcLCF3YFkZGQgOTkZAHD48GGMGzdO5Io6jurqaoSEhMDT0xP/93//hw0bNuCp\np57CmDFjxC7tEYsXL8bt27dRVFSElStX1vuera0t3nrrrUY/W1lZqel+qeAmCPQQBnYH4urqqtnF\nh8MCgfLycvz73//GjRs3kJeXBycnJ8yYMQP5+fkAgKNHj7Y4sG/cuIHvvvsOzs7OmDdvHkxM9Pd/\nfVNTU6xatUqrz7q6umLBggW4du0aJkyYoOPKSOoY2B2Iubk53nzzTZSUlMCe+wHi4MGDOHXqFC5e\nvAhra2vIZDJUVFTAyckJ+fn5mmVOWyI6Oho3b97EzZs3ERAQgKCgID1Wrr37XSlubm6aUSWtERsb\nixMnTmDUqFGYNGmSrssjkTGwOxhTU1OG9W/u//cgl8thZGQEBwcHBAQEYOTIkVCpVK16Iefh4YHU\n1FSYmpq2e/93a5w6dQo7duwAAJSVlWFmKxYiuXfvHvb8tuLUDz/8gHHjxrXrSBrSv04R2FeuXMGh\nQ4fg6+uLxx9/XOxyqIXGjh0LGxsbGBsbo3///jAxMdHsh9ja0ROhoaHw8/ODvb19i7e1q66uRl5e\nHlxcXGBsbKw5rlKpcPToUTg6Oup8WF71/c19Ac1uNy3VpUsXuLm54datW/Dw8GBYG6BOsZbIK6+8\notmS6R//+Af3oaRmqdVqREZGIiMjA97e3lixYoXme1999RVOnz4NAFi5cuUjMx/bet+DBw9CpVJh\n2rRp6NrQlMkm3Lt3DxkZGejZs2ejk3OoY+v0a4nY29ujuLgYXbt21UxiIMOSlZWFlJQUDBo0SCdd\nSuXl5cjIyAAApKam1tvtvKamRnPeg1/rgkwmw9SpU7X+vLm5Oby8vHRYEXUknaKFXV5ejsTERPTp\n00erFznUsd27dw+vvvoqKioq4OjoiDVr1ujkut999x3OnDkDhUKBJ554QnO8pKQEBw8ehJOTE8aO\nHas5LggCtm/fjqSkJDzxxBPsfiOtNJWdnSKwybCVlZXhpZdeglqthoWFBT744ANR6sjJyUFERAQA\niFoHSVun7xIhw2ZpaYnFixfjwoULCA4OFq0Oe3t7ODs7Izc3lxvlkl6whU2kQ5WVlSgsLISrq6um\nz5uoNdglQkQkEdzTkfRCrVa36y/loqIi3Llzp9nzKisrRd8PsaioCGfOnEF5ebmodZBhYR82aeX6\n9ev46KOPYGxsjJUrV2pW0tOXixcv4tNPP4WRkRGee+65RvuIjx07hqioKNjb2+OVV15B9+7dG71m\ndXU17ty5A0dHR512X1RWViIyMhIlJSXo0aMH3njjDZ1dmzo3rVvYkZGR8PX1hb+/P8LCwlBZWanL\nuqiDO336NCoqKlBWVoaEhAS93+/KlStQq9Wora1Fampqo+edOHECarUaBQUFuHbtWqPnVVVVYd26\ndXjzzTc1U8F1paKiAiUlJQCgWaiqKaWlpfjmm2+wd+9ezV8Gx44dw4cffohz587ptDaSNq0COz09\nHV988QUSEhKQlJSE2tpa7Nq1S9e1SdLVq1exb98+FBQUiF2KXgUFBcHMzAxdu3Ztl5UFg4OD4ezs\nDFdXV4waNarR80aOHAmZTAYnJyf069ev0fMKCwuRnZ0NoK71/rC2dKnY2NjgqaeeQv/+/bFw4cJm\nz4+OjkZ8fDxiYmJw4sQJqFQqREVFISUlBdu2bRO9e4c6Dq26RKysrGBqagqVSgVjY2OoVCq4ubnp\nujbJuXv3LjZt2oSamhqcP38eb7/9ttgl6c2AAQPw3nvvQSaTNbhmRWFhIbZt2wZTU1OEh4e3eecU\nFxcXrF69utnzRo0ahccee6zeuiMNcXZ2xuDBg5GcnIwpU6ZojpeXl+O9995DXl4e5s+fjxEjRmhV\n7+OPP97iiTMPzr7t1q0bzMzMYGVlheLiYtjZ2UEm46smqqNVYNvZ2eGFF15Ajx490LVrV0yePLnB\ntXvvTyIAAIVC0eAOG4ZErVZrWkMPLuJjqJpaq+LQoUOa7bzi4+PrzRRUq9V6DaGHf4FUV1dj3759\nqK2txfTp02Fubg6ZTIZnnnnmkc9evXpV0/KOj4/XOrAfJggCTp48CQAYPnx4vV8mf/jDH+Do6AhL\nS0v06tULCQkJ+Otf/4qcnBx4e3vr5P7UcSmVSiiVyhadq1VgX79+HR9++CHS09NhbW2NP/7xj4iK\nisL//M//1DvvwcDuDGxtbfHss88iJSWl2QkcW05vwUiPkQhy+X1d5vePv49p/abB21H6P6S9evUC\nUDdEqUePHprj3377LeLi4jBo0CA888wzehmrXFtbq9nwwMTEBEeOHMFPP/0EADAzM8OMGTMa/Wyf\nPn1gb2+PoqIiDB06VGc1KZVKTbdhRUVFvda3iYkJxowZg+rqaqxatQrFxcVwcXHpdD8/ndXDjdmm\n/pLUKrDPnj2LkSNHahbZmTVrFo4fP/5IYHdGgYGBCAwMbPKcD058gBd+egF2Xe1weP5hBLkEYbVy\nNSLiIrDh+AYcXXhU8qE9YsQIuLq6wsTERNNdJggCfv75ZwBAQkICSktL9bLJ7ObNm3HlyhV4enri\nlVdeqbfiXXOr31lZWeGdd95BZWUlLCwsdFbTg3s7NjbUr6qqCnfv3gVQ16X04IJTRICWgT1gwAD8\n4x//QEVFBczNzXH48GGdrwtsqArKC7Dm57rFiYoqijDh6wkY03MMoq9EAwDyyvMQeSwSO2bqduSC\nGHr27Fnv30ZGRhg6dChOnz4NHx+fJofcaau2thZXrlwBANy8eRMqlQrBwcEwMzNDTU0NRo4c2ew1\nTExMdL6F2IQJE6BSqSAIQqM7wVhYWCAsLAxnz57FmDFjGNb0CK1nOv7zn//E9u3bIZPJMGjQIHz5\n5Zf1+g47w0xHQRBw69YtODo6tnjt4ZKSEixfvxzfmn6LKtmjC9RP7jMZ0XOiYW5irutyO4zy8nJ0\n69at0UBqa8ty3759iIuLw7Bhw/CnP/1Jq2vcu3cPaWlpsLe3xzfffIOKigqEh4frfbw5Eaem68mX\nX36JM2fOQC6XY9WqVTAzM2v2M+fOncPnn3+OAtMC7HHcU+97ThZO+HXFrwYd1k0pKSnB+++/j6Ki\nIoSHh4u67+LatWuRkZGB0tJSWFhYQCaTYcSIEXj66adFq4k6B05N15PLly8DAPLy8nD79u0WfaZ/\n//5wdnZGZtfMR75XVlWGlIIUndYoJcnJycjNzUVVVRWOHTsmWh1qtRq3bt0CgHo/OE2N626pc+fO\nQalU6nzjA+ocODW9DUJCQhATEwNfX98Wb+xqaWkJ2TgZzsadfeR7qmoVJnw9QfMisrPx8vKClZUV\nysrKWrUjelPOnDmDvXv3ol+/fliwYEGLulpkMhnCwsIQFxeHoUOHYvDgwaisrISrq2ur769Wq1FY\nWAh7e3skJibi888/BwDcvn0bs2fPbvX1qHNjl0g7KygvgO8nvihQ1c2EnNxnMt4c8yae3Pkkiu/V\n7Tu5KHARtv5hq5hliqa6uhpVVVU6G6HxxhtvoLCwEADw2muvaYYbtpePPvoIly5dQt++fTFixAh8\n/fXXAIDRo0dj/vz57VoLSQO7RDoQRwtHHF14FI7dHDUvGEf1GIXD8w/DxtwGMwfMxP8++b9ilyka\nU1NTnQ6n69OnDwDA2toajo6OAIDs7Gxcv35dZ/cA6ibcfPzxxzh8+LDmWG1tLS5dugQASEtLQ0BA\nAJ588kmMHTu2ybHgnIpOjWELWyRpRWlwt3Kv94IxtTAVnraeMDNu/uVlS/3nP//B2bNnoVAoMG7c\nOJ1dVyrUajXS09Mhl8thYWGBq1evYuPGjVCr1ZgyZQoKCwvh4OCAGTNmtGlkyuuvv655jxEREQEX\nFxcAwO7duzUjVloyT+HcuXPYunUrHBwc8OKLL+pl6CN1bNwirAPqa9f3kWP9Hfo3eO7du3fx008/\nwcXFBaNHj27xPUpLS7F//34AdRvKKhSKTje2VyaToXfv3pp/Z2ZmalqwP/zwg2Ydjx49emDw4MFa\n38fe3h63b9+Gubl5vb8QZs+e3aq+6vj4eNTU1CA3NxcpKSmc30D1MLAlICoqComJiQAABwcHDBgw\noEWf69atm2aPQU9Pz04X1g0ZMWIEUlNTUVZWBktLSyQmJsLIyAjW1tZtuu7SpUtx4cIFeHp6tnj2\nplqtxvXr1yGXyzWfGTJkCFJTU2FlZaWTUSlkWNglIgGfffaZZs3pF198sVU/yCqVCllZWejVq1eL\nxol3JjU1NTh9+jQcHBzg5eXV7vffsWMHfvnlF1haWuKtt97S/NJQqVQwMzPT+WxLkgZ2iUjcvHnz\n4OrqCmdn51a3urp16yZKGEmBiYlJi6aqt1R1dTWSkpLg7u4OJyenZs+/ceMGAKCsrAz5+fmawH5w\nuVWiB7GF/YDKykrExsbC1NQUkydPhrGxsU6uW1pailOnTsHT01MzaoEMz6effooLFy7A3NwcERER\nsLW1bfL8xMRE/PDDD+jduzfmz5/Pda8JAFvYLXbgwAEcOHAAAGBubt7iBeib8/nnn+Pq1aswMTHB\nO++8o1nlUF/UajX27NmD4uJizJw5E3Z2dnq9X3uqrKzEyZMn4eLi0uH+csjNzQVQtw5JcXFxs4Ed\nEBDQLrv1kOFgYD/gwRa1LvsP7y+nWVNTg3v37unsuo05c+aMZv1ntVqNJUuW6P2e7WXHjh04e/Ys\nZDIZXn/9dXh4eIhdkkZYWBj279+PPn36wNPTU+xyyAAxsB8wdepUmJubw8zMrNkNCFpj0aJFOHTo\nEPr27dsuW6k9OOKhraMfOprS0lKUl5cjOTkZa9asqTfmWWz9+/dH//4ND80k0gX2YRuolJQU3Llz\nB8OGDdNZX3xzkpOTYWZmptfhaHl5eXj77beRkZEBDw8PTJkyBTNnztTb/YjaG5dXbUBqaiqioqIg\nl8uxZMkSSQ55u3TpEvbv3w8vLy/MmDED165dg6Ojoyh91kePHtVsgfWXv/xFr0ujJicnY8uWLZDJ\nZPjb3/7W4fqyidqCLx0b8OOPPyIvLw95eXlISkpq0yw3sezatQsFBQW4efMmMjIykJKSgq5du+LN\nN9/U+4vNh91/4QbUtYL1ydfXF++++y4AcOo2dSqdNrD79u2L1NRUmJubS3YXETc3NxQUFKBbt24o\nKioCULfBa35+frsH9tSpU3Hnzh2YmZlhzJgxer8fg5o6o07bJQIAv/76K6ytrWFjYyN2KVqprq5G\nSkoK3NzckJ+fj927d8PDw6PemN709HTs27cPHh4ezS5wlJeXB1tbW0l2DxEZCvZhd2Lr16/HzZs3\nAQB///vfGx3FsHPnTiiVSjg6OuKNN95odnfxzuDKlSuQyWTsI6d2xfWwOzEHBwcAdePKm/pLIikp\nCQBQUFBQrz+6szp27Bg2btyI999/H2fOnBG7HCIAnbgPu7NYuHAhAgIC4OLiArlc3uh5U6ZMwZ49\ne9CvXz/06NEDQN2elV9++SVsbW2xfPnyFq9CZwge/KXFX2DUUbBLRELu3LmDjIwMeHt7t0s/8yef\nfKJZ1nX+/PmtWotb6kpKShAVFQWZTIZ58+bpdBccoqZwWJ8BUKlUWLt2LUpLS+Ht7Y0VK1Y0em5N\nTQ3i4+Nhbm6O4cOHa70O9sCBA3Hx4kV069at063NbGVlhaVLl4pdBlE9DGyJKCoqwvXr12FhYYFb\nt241eW5MTAx+/PFHAHW/rYcPH67VPUePHg0/Pz906dKFLyGJOgC+dGyjnJwcqFQqvd/nxx9/REVF\nBa5fv46pU6c2ee6DC0y1dbEpGxsbhjVRB6F1YBcXFyM0NBTe3t7w8fHByZMndVmXJERHRyMiIgJv\nv/02iouL9Xqv/Px81NTUQBCEZt8NhISEoHfv3pDL5XqdIk5E7UvrwF6+fDmmTZuGlJQUXLx4Ed7e\n3rqsSxIuX74MoO4FVVZWll7vNWLECKhUKlhbWzf7yzE7Oxs3btxAXl4edu7cqde6iKj9aBXYd+/e\nRXx8PMLDwwHUjfE1tGU8W2LKlCmwtraGj4+P3pfVDAwMxODBg9GrV69mlxNVqVSancFramr0WpcU\nqNVqzX8fRFKm1bC+Cxcu4Nlnn4WPjw8SExMxePBgbNq0qd5edBzWp3t5eXnIycmBn59fgxssxMTE\n4Ntvv0VOTg4cHBwwbtw4zJkzR7JT73Xh119/xaZNm6BWq/G3v/0NvXv3FrskoibpfFhfTU0NEhIS\n8PHHH2Po0KFYsWIF1q9fj3feeafeeREREZqvFQoFFAqFNrej38jl8iYnv8TExCArKwv5+flwd3dH\n7969O3VYA0BCQoJmx5+zZ88ysKnDUSqVUCqVLTpXq8B2d3eHu7s7hg4dCgAIDQ3F+vXrHznvwcAm\n/fP390d+fj4qKyvh5OSEwMBAsUsSXVBQEOLi4iAIAgYNGiR2OUSPeLgxu3r16kbP1Xqm45gxY/Dl\nl1/Cy8sLERERqKio0KxRDBhml8i1a9dQUVEBf39/rSej6JNarUZ+fr5mxb2OWKMY7o+uMTU1FbsU\nombpZbW+xMRELF68GFVVVejTpw+2bdtW78WjoQX2xYsXsWXLFgDAzJkzMWXKFJErIiJDpJep6QEB\nAZ1qFbPCwkLN1wUFBXq9V0pKCoyNjbmsJxHVw6npLTRq1ChkZGRApVLhiSee0Nt9fv75Z0RFRQEA\nlixZgiFDhujtXh3d7du3O+2QUaKGMLBbqEuXLnj66af1fp/23BuxIzt79iy++uorGBsbY+XKlejT\np4/YJRGJrlMFdkZGBrp37w5bW1uxS2nU5MmTNS3LzjwMMiUlRTPh5erVqwxsInSi9bAPHDiA6Oho\nmJmZ4bXXXoOrq6vYJVETMjMz8dlnn8Hc3BzLli2DnZ2d2CURtQuuhw0gLS0NAFBVVYWMjAwGto78\n8ssviImJga+vL8LCwnR2XQ8PD6xZs0Zn1yMyBJ1medWpU6fC2dkZfn5+ok4ouXbtGtasWYOtW7ca\nxDofP/zwAwoLCxEXF4ecnByxyyEyaJ2mhd23b98mZxC1l+joaGRmZiIzMxODBw9GQECA2CW1Sb9+\n/XD+/HnR7yyJAAAJN0lEQVQ4ODiw24JIzzpNYHcUPXv2RFpaGrp06dLsqntS8MwzzyA9PR3Ozs7o\n0qWL2OUQGbRO89KxoxAEAdeuXYO9vT3s7e3FLoeIOhi9TE1vy02JiKhhTWVnp3npSEQkdQxsIiKJ\nYGATEUkEA5uISCIY2EREEsHAJiKSCAa2AUlPT8eVK1fELoOI9IQzHQ3Eg1uYzZkzB+PGjRO5IiLS\nNbawDcSDCy9lZ2eLWAkR6Qtb2AYiODgY6enpuHfvHjcIJjJQnJpORNSBcGo6EZEBYGATEUkEA5uI\nSCIY2EREEsHAJiKSiDYFdm1tLYKCghASEqKreoiIqBFtCuxNmzbBx8cHRkZGuqqHiIgaoXVgZ2Vl\nISYmBosXL+Z4ayKidqD1TMeVK1diw4YNKCkpafSciIgIzdcKhQIKhULb2xERGSSlUgmlUtmic7Wa\n6bh//34cOHAAW7ZsgVKpxPvvv4///Oc/9S/MmY5ERK2m85mOx48fx759++Dp6Ym5c+fiyJEjWLBg\nQZuKJCKiprV5LZG4uDi89957bGETEemA3tcS4SgRIiL942p9REQdCFfrIyIyAAxsIiKJYGATEUkE\nA5uISCIY2EREEsHAJiKSCAY2EZFEMLCJiCSCgU1EJBEMbCIiiWBgExFJBAObiEgiGNhERBLBwCYi\nkggGNhGRRDCwiYgkgoFNRCQRDGwiIolgYBMRSQQDm4hIIhjYREQSwcAmIpIIBjYRkUQwsImIJIKB\nTUQkEVoFdmZmJsaNGwdfX1/4+flh8+bNuq6LiIgeYiQIgtDaD+Xm5iI3NxeBgYEoKyvD4MGDER0d\nDW9v798vbGQELS5NRNSpNZWdWrWwnZ2dERgYCACwtLSEt7c3srOzta+QiIiaZdLWC6Snp+P8+fMY\nNmzYI9+LiIjQfK1QKKBQKNp6OyIig6JUKqFUKlt0rlZdIveVlZVBoVBg1apVmDFjRv0Ls0uEiKjV\ndN4lAgDV1dWYPXs25s2b90hYExGR7mnVwhYEAQsXLoS9vT02btzY8IXZwiYiarWmslOrwD527BjG\njBmDgQMHwsjICAAQGRmJKVOmtOimRETUMJ0HdltvSkREDdNLHzYREbUvBjYRkUQwsImIJIKBTUQk\nEQxsIiKJYGATEUkEA5uISCIY2EREEsHAJiKSCAY2EZFEMLCJiCSCgU1EJBEMbCIiiWBgExFJBAOb\niEgiGNhERBLBwCYikggGNhGRRDCwiYgkgoFNRCQRDGwiIolgYBMRSQQDm4hIIhjYREQSwcAmIpII\nrQM7NjYWAwYMQL9+/fDuu+/qsqYOSalUil2CzhjSswCG9TyG9CyAYT1PR3gWrQK7trYWzz33HGJj\nY3H58mXs3LkTKSkpuq6tQ+kI/2PpiiE9C2BYz2NIzwIY1vN0hGfRKrBPnz6Nvn37olevXjA1NcWc\nOXOwd+9eXddGREQP0Cqwb926BQ8PD82/3d3dcevWLZ0VRUREjzISBEFo7Yd2796N2NhYfPHFFwCA\nb775BqdOncJHH330+4WNjHRXJRFRJ9JYLJtoczE3NzdkZmZq/p2ZmQl3d/cW3ZCIiLSjVZfIkCFD\ncO3aNaSnp6Oqqgr//ve/MX36dF3XRkRED9CqhW1iYoKPP/4YkydPRm1tLf785z/D29tb17UREdED\ntB6HPXXqVKSmpiItLQ2vvfZave8ZyhjtzMxMjBs3Dr6+vvDz88PmzZvFLqnNamtrERQUhJCQELFL\nabPi4mKEhobC29sbPj4+OHnypNgltUlkZCR8fX3h7++PsLAwVFZWil1Si4WHh0Mul8Pf319zrKio\nCBMnToSXlxcmTZqE4uJiEStsnYae56WXXoK3tzcCAgIwa9Ys3L17t93r0vlMR0Mao21qaoqNGzci\nOTkZJ0+exJYtWyT7LPdt2rQJPj4+BvFSePny5Zg2bRpSUlJw8eJFSf+Vl56eji+++AIJCQlISkpC\nbW0tdu3aJXZZLbZo0SLExsbWO7Z+/XpMnDgRV69exfjx47F+/XqRqmu9hp5n0qRJSE5ORmJiIry8\nvBAZGdnudek8sA1pjLazszMCAwMBAJaWlvD29kZ2drbIVWkvKysLMTExWLx4seRfCt+9exfx8fEI\nDw8HUNdNZ21tLXJV2rOysoKpqSlUKhVqamqgUqng5uYmdlktFhwcDFtb23rH9u3bh4ULFwIAFi5c\niOjoaDFK00pDzzNx4kTIZHWROWzYMGRlZbV7XToPbEMdo52eno7z589j2LBhYpeitZUrV2LDhg2a\n/9NJ2c2bN+Ho6IhFixZh0KBBWLJkCVQqldhlac3Ozg4vvPACevToAVdXV9jY2GDChAlil9UmeXl5\nkMvlAAC5XI68vDyRK9KdrVu3Ytq0ae1+X53/5BrCn9oPKysrQ2hoKDZt2gRLS0uxy9HK/v374eTk\nhKCgIMm3rgGgpqYGCQkJWLZsGRISEmBhYSGpP7kfdv36dXz44YdIT09HdnY2ysrKEBUVJXZZOmNk\nZGQw2bB27VqYmZkhLCys3e+t88BuyRhtKamursbs2bMxb948zJgxQ+xytHb8+HHs27cPnp6emDt3\nLo4cOYIFCxaIXZbW3N3d4e7ujqFDhwIAQkNDkZCQIHJV2jt79ixGjhwJe3t7mJiYYNasWTh+/LjY\nZbWJXC5Hbm4uACAnJwdOTk4iV9R2//rXvxATEyPaL1OdB7YhjdEWBAF//vOf4ePjgxUrVohdTpus\nW7cOmZmZuHnzJnbt2oXHH38cO3bsELssrTk7O8PDwwNXr14FABw+fBi+vr4iV6W9AQMG4OTJk6io\nqIAgCDh8+DB8fHzELqtNpk+fju3btwMAtm/fLukGD1A3+m3Dhg3Yu3cvzM3NxSlC0IOYmBjBy8tL\n6NOnj7Bu3Tp93KJdxMfHC0ZGRkJAQIAQGBgoBAYGCgcOHBC7rDZTKpVCSEiI2GW02YULF4QhQ4YI\nAwcOFGbOnCkUFxeLXVKbvPvuu4KPj4/g5+cnLFiwQKiqqhK7pBabM2eO4OLiIpiamgru7u7C1q1b\nhdu3bwvjx48X+vXrJ0ycOFG4c+eO2GW22MPP89VXXwl9+/YVevToocmCpUuXtntdWq0lQkRE7U/6\nwwWIiDoJBjYRkUQwsImIJIKBTUQkEQxsIiKJYGATEUnE/wNGpwjUX/H1bwAAAABJRU5ErkJggg==\n"
}
],
"prompt_number": 32
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"plt.scatter(x = x_n[:,0], y = x_n[:,1], c = colors_m[np.argmax(pa_n_m, axis=1)], s = 50 * (np.max(pa_n_m, axis=1)-.333), marker = 'o', faceted=False, alpha=.6)\n",
"plt.scatter(x = centroids_mu_m[:,0], y = centroids_mu_m[:,1], s = 50, lw = 3, c = colors_m, marker = 'x', faceted=False)\n",
"plt.xlim([0, max(plt.xlim()[1], plt.ylim()[1])]);plt.ylim([0, max(plt.xlim()[1], plt.ylim()[1])]);\n",
"plt.title('Step = {}'.format(step))\n",
"print 'At step {}, {} assignements changed and total variance equals to {:.2f}'.format(step,\n",
" np.sum(np.argmax(pa_n_m, axis=1) != np.argmax(old_pa_n_m, axis=1)),\n",
" np.mean(np.sqrt(dist_n)))\n",
"\n",
"# Performing step\n",
"centroids_mu_m, estimated_prob_m, new_pa_n_m, dist_n_m = em_iteration(x_n, centroids_mu_m, estimated_prob_m, sigma_m)\n",
"pa_n_m, old_pa_n_m = new_pa_n_m, pa_n_m\n",
"step += 1"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"At step 10, 0 assignements changed and total variance equals to inf\n"
]
},
{
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAWwAAAEHCAYAAACKrHwgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XlcU2f2P/BPQgIBwr4vKiqigAoqCtiquKDWqrVqa+3i\nVu2r4+/bqR2ne2fUdtzqtJ3utbZWWm21+6rUWhvUqnVBcUMQZUf2TXaS3N8fjySEBAghIbnJec/L\n15DLzc1JW0+ePPc85xFwHMeBEEKIxROaOwBCCCH6oYRNCCE8QQmbEEJ4ghI2IYTwBCVsQgjhCUrY\nhBDCE5SwidkdO3YM48ePh7u7O7y8vHDnnXfizJkzAIBdu3ZhwoQJZo5Q2zvvvIOYmBhIJBIsX75c\n6/e///47hg0bBmdnZ0yZMgV5eXlmiJJYG0rYxKxqa2sxe/ZsPPnkk6iqqkJhYSHWrVsHBwcHc4fW\npaCgIPzrX//CihUrtH5XXl6OBQsWYOPGjaiqqkJMTAwWLVpkhiiJ1eEIMaPTp09z7u7uOn935coV\nTiKRcHZ2dpxUKuU8PDw4juO4pqYmbu3atVz//v05Pz8/7vHHH+caGxs5juO4P/74gwsKCuI2bdrE\neXt7cyEhIdyePXtMFv9LL73ELVu2TOPY9u3buTvuuEP1uL6+nnN0dOQyMjJMFgexDTTCJmY1dOhQ\n2NnZYdmyZUhOTkZVVZXqd+Hh4fjggw8QHx+PW7duobKyEgDw3HPPISsrC2lpacjKykJhYSFefvll\n1fNKSkpQUVGBoqIiJCUl4bHHHkNmZqbO11+9ejU8PDx0/omOju42fk7HQuHLly8jKipK9djJyQmh\noaG4dOmS3v9cCNGFEjYxKxcXFxw7dgwCgQCrVq2Cr68v7rnnHpSWlgLQTogcx2HHjh14/fXX4e7u\nDqlUiueffx579+7VOO+VV16BWCzGxIkTcffdd+PLL7/U+frvvfceqqqqdP45f/58t/ELBAKtY/X1\n9XB1ddU45urqirq6um6vR0hXROYOgJBhw4bhk08+AQBkZGTg4Ycfxpo1a/D5559rnVtWVoaGhgaM\nGTNGdYzjOCiVStVjDw8PODo6qh4PGDAARUVFJold1whbKpWitrZW41hNTQ1cXFxMEgOxHTTCJhZl\n6NChWLp0qWr6oOMI1tvbG46Ojrhy5YpqJFxdXa2RIKuqqtDQ0KB6nJubi6CgIJ2v9/jjj8PFxUXn\nnxEjRnQbr64RdmRkJNLS0lSP6+vrcf36dURGRnZ7PUK6QgmbmFVGRgZef/11FBYWAgDy8/PxxRdf\nID4+HgDg5+eHgoICtLa2AgCEQiFWrVqFNWvWoKysDABQWFiIgwcPalx33bp1aG1txdGjR/HLL7/g\nvvvu0/n6H3zwAW7duqXzz8WLFzuNW6FQoKmpCXK5HAqFAs3NzVAoFACAe++9F5cuXcK3336LpqYm\nbNiwAdHR0QgLC+vdPyxi8yhhE7NycXHBX3/9hdjYWEilUsTHx2PkyJF47bXXAABTp05FZGQk/P39\n4evrCwDYunUrQkNDERcXBzc3NyQmJmrcVPT394eHhwcCAwPxyCOPYPv27UZPlq+88gqcnJywdetW\n7N69G46Ojti4cSMA9i3gm2++wYsvvghPT0+cOXNGa46dEEMIOF2TcITwlEwmwyOPPIL8/Hxzh0KI\n0dEImxBCeIISNrE6um4EEmINaEqEEEJ4gkbYhBDCEyZbOENfSwkhxDCdTXyYdITNcZzV/Fm3bp3Z\nY6D3Yv3vx5rei7W9n756L12hKRFCCOEJStiEEMITlLD1lJCQYO4QjMaa3gtgXe/Hmt4LYF3vxxLe\ni8nK+gQCQbfzMYQQQjR1lTtphE0IITxBCZsQQniCEjYhhPAEJWxCCOEJStiEEMITlLAJIYQnKGET\nQghPdJmwV6xYAT8/P43NSJ9++mmEh4cjKioK8+fPR01NjcmDJIQQ0k3CXr58OZKTkzWOTZ8+HZcv\nX0ZaWhrCwsKwefNmkwZICCGE6TJhT5gwAR4eHhrHEhMTIRSyp8XGxqKgoMB00RFCCFHp1Rz2zp07\nMWvWLGPFQgghpAsGb2CwceNG2Nvb48EHH+z0nPXr16t+TkhIsIjmKYQQYklkMhlkMple53bb/Ckn\nJwdz5szBxYsXVcd27dqFHTt24Pfff4dEItF9YWr+RIi24mJg/37g5k1gyBBg9mzAycncUREL0lXu\n7PEIOzk5Gdu2bUNKSkqnyZoQm9HQABQUAP7+gKtr1+fW1wPbtgF1dexxXh6Qmws8/bTp4yRWocuE\nvXjxYqSkpKC8vBz9+vXDhg0bsHnzZrS0tCAxMREAEB8fj/fee69PgiXEopw4AXz+OdDSAtjZAffc\nA8yY0fn5Z8+qk3WbrCyW8IODTRsrsQpdJuwvvvhC69iKFStMFgwhvHHrFrBnD9Dayh4rFMC33wJR\nUWy0rYtC0bPjhHRAKx0JMURurjpZt5eV1flzRo8GOk4jBgUB/fsbNzZitShhE2IIf39AINA+HhDQ\n+XPc3IAnnwSGDWM/x8QATzyh+zqE6EBbhBFiqG++AQ4eVD8eNw549FHzxUOsQle5kxI2Ib2RkwPc\nuMGmNoYONXc0xApQwibWRakEvv4aOHYMEAqByZOBuXNpaoFYBdqEl1iX334Dfv8daG4GGhvZQpQ/\n/zR3VISYHCVswj/nz+t3jBArQwmb8E+HDpIAAHf3vo+DkD5GCZvwz8yZmvXMzs7A7ZW3hFgzuulI\n+KmqCjh1ii0JHzeu+z4ehPAEVYkQQghPGLVbHyHEBnAcq8Q5cwZwdASmTQMiI80dlc2jETYhRNv3\n3wMHDqgfC4XAP/7BengTk6I6bEJ6IjcXuHRJd3MnW3HkiOZjpRI4etQ8sRAVmhIhpI1SCWzfrq7p\ndnVlzZpssVe1rpavSmXfx0E00AibkDbnz2suwKmtZT2ubdEdd2gfGz++7+MgGmiETUibmze1jxUW\nAuXlgKcnm8e1FQsWsFr3M2fYnpOJiUBEhLmjsnl005GQNtevA6++qn5cUcH2bOzXjyXslSuBwYPN\nFx+xCXTTkRB9DB4MLFzIytiUSpaw2zYkqKwEPvmElbvpIzcXSEsDmppMFy+xOTTCJqQjuZz1uH7t\nNe3fbdvW9apKjgM++ohNJQBsOuHvfwcGDjRNrADbXzIlhX2ojBwJREeb7rWIydEIm5CeEImAAQPY\nSLs9b2/AxaXr56anq5M1wKZUTHnjsrER2LwZ+Okn1mL2/fc166eJVaGETYguDg7A0qWssRTA9mBc\nurT7TRJKSrSPFRcbP742p0+zqZv2Dh7Uf+qG8ApViRDSmVGjgOHDWUL08WGNprozbBhL6u0TZni4\n6WJsbNQ+1tzM6qhF9Nfb2tAIm5CuiMVsh/TOkjXHsTnvNgEBbCTu5sbKAKOigEWLTBffqFHaiXn0\naErWVopuOhJiqAsXgA0bWDlgUBDw738DsbHsdxzXd6Pc9HQ2h11RwW46ttVQE16i9qqEGFtzM/DQ\nQ8CVK+pjQUHAN99Qb27SKwZXiaxYsQJ+fn4YMWKE6lhlZSUSExMRFhaG6dOno7q62rjREsIHxcXa\nNxhraoCsLPPEQ2xClwl7+fLlSE5O1ji2ZcsWJCYmIjMzE1OnTsWWLVtMGiAhfSotDXjxReC557re\niT0wkN2IbK+iAvj8c/UNR44D/vlPVrtNiBF0OyWSk5ODOXPm4OLFiwCAYcOGISUlBX5+figuLkZC\nQgKuXr2qfWGaEiF8c+sW8Pzz6raqQiGbl25b7djRlSvs/Bs32KKVoiJ2fOFC4MsvgaefVi++efVV\n9piQbhh1x5mSkhL4+fkBAPz8/FCiq+70tvXr16t+TkhIQEJCQk9fjpC+U1OjTtYcx5anV1Z2nrAj\nIoAffgDy8oAJE9THv/5au1HUyZPserbUQIroRSaTQSaT6XVur25hCwQCCLpYSNA+YRNi8fz9WXKW\nyYDSUlaaZ2/Pfnf9OluQYm8PzJsHeHmpn9fczLbQ+u03ID9f+7r33gvs3g189RW7TmAgcP/9bNk6\nsXkdB7MbNmzo9Nwef9y3TYUAwM2bN+Hr69vzCAmxRCIRMHMmK4nr1w8YOhT45RfWF/utt1iv7FOn\ngLff1nxeUBBL4ImJuq+7bx8ruzt8mDWFOnECSErSP670dOCDD4CPP9b9gUBsRo8T9ty5c5F0+z+2\npKQkzJs3z+hBEWI2YjHrIxISwpant7ayapD2Xfdu3tTcPszJie13WFCg+5r/+Q8bWbd344Z+8eTl\nsQ+Lc+fYh8V//8s+QIhN6nJKZPHixUhJSUF5eTn69euHl19+Gc899xzuv/9+fPzxxwgJCcGXX37Z\nV7ESYnpRUWxknZHBRtr33MNG0O7uQFsJa1gYS+xtOI4l0oMHdV/z5ZfZNIqPj7oXSVCQ7nNLS1kb\n17IyNkfu6qq5NVdTE3D5MhAf3/v3SniHFs6YSG1zLZzEThAJaYkw73Ac22VGKlV37CsvZy1MJRJg\n6lTNlYRyOTBpEnD8OHscGgr88Qfw2GPqznmLF7Nzbtxgc9jLlrE58o42bmSj6jYhIUBOjuY5a9ey\nDw1ilYxaJUK6VnSrCDvP7UR+bT6cxE64e8jdmDZomrnDIj0hEGjXWHt7syXfuohEwIwZbKqE44CE\nBDZC/vZbYP58Nr+9a5d+zaM6dvZzcWENqC5dYnHdcQclaxtGCduIOI7D9rPbUVzH/tI1tDbgqytf\nob9bf4R50V8yqzZoEBt5A2zuOzCQjcK/+44ldH2SNcCmY26veQDApkWmTGFTJWIx4OFh/NgJb1DC\nNqLS+lJVsm7vQskFStjWbtEiNoVSWcl2F7+9VgEODj27zsqVLMmXlgKRkcDkyew4VWMRUMI2Kmd7\nZ9gJ7KDgFBrH3Rx0zFUS6yIWsxuLvdXYyObLa2vZ5gndbZhAbAotuzIiqb0UkwZM0jjmIfFAfD+6\no0/0tGcPm68uKAA+/dS0u9UQ3qERtpHdH3k/BnkMwuWyy/By9MKkkEmQ2kvNHRaxBGlpbMWjiwuw\nejW7kdlR++6XSiUbafv7912MxKJRWR8hesjOBo4eZTuAjRtn4EX+9S82Nw2wEr8HH9Q+5+RJtgpS\nqQQGD2YLcmj3GJtCZX3EKty8yRb7OTqyyrm2Nh+mxnFsNXp9PSu1DgwEgoMNuJCXF0vYcrn2juxt\n4uKAgQNZI6pBgyhZEw00h21hyurLkF6WjiZ5U/cn25DSUmDrVmD/frapy9tv9+3G4G2LDTmu+9dt\nbga2b2cdVevq2v1i5Uq2QrGmhq2KPHdO9wX8/FitNSVr0gH9F2EhOI7DF5e+wJHcI+DAQSKS4NFR\nj2Kk30hzh2YRLlzQ3CA8M5NN9/ZFWbJAwKacjxxhUyL9+nV9/qefAm++yX7Oz2/XK0oqZSsXvbzY\nJ8Dly2wT3Y6ystiKyLg42m6MaKARtoW4VHoJKbkp4MCGb03yJnya9inkSnk3z7QNbau4GxvZupLz\n51k3074aZYeFsQHynXd2f25zs/rnhoYOvxw9miVtHx9g4kTtJ1dUAK+/zr5GbN/em5CJFaIRtoW4\nXnVd69itllsoqStBkGsnjYKsXF4ea8lx6pS6PDkzk5U8h4UBv//OejJNn979tbKzgdOn2SB36tSe\nr2fpicceY7FWVQFr1nT4pasr26WmMy0tbLd1QPMrBSGghG0xAl0CtY5JRBJ4OXnpONv6lZayrRCz\ns9nsgFAIjBjBVniPGaNOuPp0Kc3NZc305Le/rFy9yoovTMXeHjB4746AAGDpUuDaNfVSd0Jus5kp\nkVZFq0WXGY4JGIOhXkNVjwUQYN7QeZCIJF08y3qdPs0Gm22tn5VKdq9OJGLVGm0CtT/ntJw/r07W\nAOucaqktpZubga8Kx+Mz4VJUSw0oRUlOZvtQdtbqlfCa1Y+wC2oLsPvCbmRXZ8PL0QsLwhdgTOAY\nc4elxU5oh6finsLlsssoqy9DuE84/KW2u2BCenutkasrm14A2Mg1KooNQpuagJEjgVmzur9Wxy6m\n9vadV9XpQ6FgCxGdnXWvfemN/fuBQ4fYz7dusZudemtqYn1IADYHPnmyZt9uwntWnbCVnBLvnn4X\nlY2VAICKxgp8dO4jBLsGw0/qZ+botAkEAgz3HW7uMCxCXBxw7Bi7qdjYyHJRRATbCjE6umfXmjCB\nzX2fPcsa6K1Y0X0eq6gAPvyQJeZBg9i8tIsLG/X/739sAxmBgMUzZYrh77Oj9hvZtP9ZLw4ObGOE\nwkJWykJlgVbHqlc6Xq+8jlePv6p1/N5h92Jm6EwzRER6Qqlk0xcCAes62lkfJI5jlSP5+exm5JAh\nus9raWGJWp9+Sm+8wea624wdy6pEjh0DPvtMfdzOjpXt6ds9tTv19WwLyMZG4L77DGjS19TE7tYO\nGGDaO6vEZGx2paOTWPeu1J0dJ5ZFKATCw7s/74sv2GYwbRYu1L0fbk9WRratIG9TVsb+v/1uXYDm\nQprWVrbBTHEx+zagTwlgR87O7BuAwSQS2uDAiln1TccAlwCM9NVceOLp6ImxgWPNFBExtlu3WI+P\n9vbv7319dkSE5uO2D45x4zS3Y5w9Wz3zsHs322T97Fk2Cu8YFyG9ZdUjbAB4bMxjkOXIkFGRAX+p\nP6YNmgZHcS/uOBGL0tKiPeptbWUJuzetpBcvZnPWBQWsB9OMGey4RAI89xwrJ3R21lz1eOWK5jWu\nXGHz54QYi9UnbLGdGImDE5E4WMd3ZMJ7Xl5sNNw+WcbHs+mU3hCJOt+PwN6eLVHvyNdXs1yQNokh\nxmbVNx2JbWhqYqVw+fnshuPkyca7CdgT5eXAzp3qOeylS6mqjvRcV7mTEjbhBbkcSE9n0xzh4Z0n\nZKWSJW5XV/PuV1tezuIYPJj6N5GesdkqEWJabf9NmXrbwepqtrS8rVLD3x9Yu1Y7EZaUsBK7sjI2\nJTJhApuL7iy+8nL2nIEDgZ9+An74gXXaa7uJuHMn63/94Yc9m2L54w9WmsdxbIT92GNskQ8hvUUJ\nmxjkxg2WHEUi4Kmn9Fsibqhff1Una4BNORw6BMyfr3nel19qlt+lpLCVkZGR2tf88Ud1NUluLuv8\np1Sykfznn7PEvXIl+71SCXz0kX5Ju7UV+P57zVK/777rOmEXFQEnTrAbmhMmsFWef/zBjg8YwI7R\nXrwE6EXC3rx5M3bv3g2hUIgRI0bgk08+gYONF+oX1Bbgl8xfUFxXjDCvMMwZOsdq93M8dUrdOvTs\nWdMmbF370Oo6lp2tfSwnRzthl5Zqlv4VFakrTb76iv1pLy2NbUSgz9RGczObU2+vpqbz8wsLgS1b\nWLULAPz5J1skdPw4e3zsGOv6d8893b82sX4G3UvPycnBjh07kJqaiosXL0KhUGDv3r3Gjo1Xaptr\n8dqJ15BanIqiuiLIcmV459Q75g7LZEaPVvfkiIrS/n1jIxslHjliwBLrDnRVZOg6NnCg9rGQEO1j\nRUWaddpxccDwTjoCjBzJRt/6zkNLpdofELGxnZ9//Lg6WQNsSXxysuY5p07p99rE+hk0wnZ1dYVY\nLEZDQwPs7OzQ0NCAoCDb7Nnc5kzRGTS0anarz67ORn5NPvq5dbNFCQ+FhbE++wKBdsuK1lY251xQ\nwB6fPs3amRr6tX7qVDai/usv9nj8eLanY0eLFrEpkZISNn2RkMCS56lT7MNDqWTTC1FRLOa2Dn5t\nPUGyszU7AQJsn1xPz57Fu2oVS7p5eeyDZdo0Nko+eZJ9UMTFqa/Z8UtpfT0bzYvF6gZVvWlURayL\nQQnb09MTa9euRf/+/eHo6IgZM2Zg2rRpWuetb9cUOCEhAQm6/pZZCYVSofs4p/u4NeisZK2wUJ2s\nAdZ4qaqKnb9nD3scEgI89BCro+6OSMRK5BYtYsm1s5k3X19gwwbgzBmWuCMjWWvVjz9Wn5OTwypM\nVqwA9u5lddNBQWwRTMdkDbDnrl3bsz5Kjo7AvfeqH5eVAZs3q6//229sDwNfX7bpzPHj7J9Pfj5Q\nWck2+D13jjWdGjiQvW9ivWQyGWQymV7nGlTWd/36dcyZMwdHjx6Fm5sb7rvvPixcuBAPPfSQ+sI2\nVtZX1ViFf8v+jRaF+vttkEsQ/jXxXxDY2B2jqirgxRfVG6dIJGxD2qQkNt/dZsgQ4J//NO5r//QT\n8PPP6sdKpfbNwqFD2YhfqWSj2a+/Vt9g1OW++9iNSEOb333zjXZ76mnT2HUBNud95gzw7rtsdaVA\nwKZJ7OzYtxgqC7QtXeVOg+awz5w5g/Hjx8PLywsikQjz58/H8ba7JDbKw9EDa2LXIMwzDK4OrogJ\niMET457QStZKTokLJRfwZ96fqGmqAcdxaJY3d3JVfvLwAB59lI0gAwKAxx9no+LMTM3zrl0z7p6M\n1dXsZmLH1+i401bbNwOhkCXD7Gx1HCEhbDTffk67oICNfA3VrONfb/tjEgnbTcfVVT1tZG/P4nR2\nNvx1ifUxaMwwbNgwvPLKK2hsbIREIsGhQ4cwbtw4Y8dm8TLKM5BZkQl/qT9GBYzCYM/BWDt+bafn\nN8mb8Nrx15BXmwcAKKsvg6uDKxxEDhjsMRhLo5ZaZJ9uQ4wZw/60N2gQq7hoM3Cg9rx2Swv7IzWg\nuKasTLuvSEAAS45t88BCofbOWy+/zBL2gQNsukKhYPPu770HXLrEVi3++9+sD/eyZSzB9sT48aza\no+0bh1DIjrXn5sZG/hkZ6mMxMeZZsUksl8ErHV999VUkJSVBKBRi9OjR+OijjyBuN6lp7VMiX1/5\nGr/d+E31ONQjFP+I/wfshJ3/Dfs161d8e/VbAKyqJK04DQ4iB4wLHAcIgEBpINYlrDN57OZSVcWm\nRTIy2Eh22TLAr93n0y+/sJt1LS1s/nnlSsCpB51wm5uBZ5/VHFGLxWzXlrNnWTK/4w4gNFT7uW0b\nJTg5semPlBTg5k32rcDXV92tb+pUdoOypzIzgbZpyoQE3R1Q6+vZdE5eHotx1ixqaW2LaGm6kVU1\nVuGFwy9AyWkO5x4b/ViX2499lPoRThedBgDcqLqBwtpCAEBccBzEduzDbkPCBpvcGuzaNVZZ0l5C\nAlup2BOXLrFFLzU1bJT+4INsJO/iol9fj8pK4IUXWAJPT1dvTzZqFLuevz+7sWmI4mL2QdDUxCpF\nhg7t/jnE9tDSdCMrayjTStYAUFynYzVHOyHuIaqELRay7CERSVQ/CwVCm91cIStLv2PdGT6cVWRU\nVLAR665dLFE6OgJz53a/nVd1tXo+u305XXMzS9g97cBXWsoqUyQSVnHStqjmxAm2ZH306J5dj9g2\nStgG6O/WHxKRBE1yzSVtw7x1rOZoZ+KAiUgrTkNmZSb8pH4oqS9BqEcocHseNy4oDq4OtlkS0E9H\nqXr//oZdy84O8PEB3nmH1WQDbLpj3z5WmaLrtdoMGAC4u7PEHRjI5sVbW9kcs6MjMGeO/nH89hur\nEOE4tpTfzo5dH2DHDh2ihE16hhK2ASQiCZZFLUNSWhIa5Y2wE9hh+uDpGOw5uMvn2dvZY+34tciq\nzEJtcy38nP1wouAEyhvKEeETgTv7G7CnVDfkSjmqm6rh6egJocByNxiKjAQmTWK7tCiVrBa5N8ux\nKyrUybq9v/4CLl9mI+axY7WX1NvZsTnv3bvZXPLdd7PVjgEB7KajrpuhVVWspvvqVTYCX7CA1Xa3\n7ymiVLL6dD8/9U3L9isc23Acq8eWSnu+YIdYP5rD7oVmeTPya/Ph6+xrkSPjU4WnsO/yPtS11MFD\n4oFHRj6CSF8dnZAsSFUV61HS24Wzzc3AM89o9vVoaGCJUyplFRs5OWxOOiQEmDlTew9GuVy/2ust\nWzT7mIjFrKzxgw/Ux2prWYVMZKQ6ES9apDlFU1qq/lYgELCNGJYsocZPtsboddiEcRA5INQz1KTJ\nulnejH2X9uH535/H5qObce7mOb2eV9VYhV3nd6GupY49bqrCjtQdWtM4lsbDo/fJGmDVFbNnax6T\ny9V1zVlZrKdIZiab9vjsM3aTsT19knVFhXbTqdZWdrz9HLirK1sSHxPDboIuXqw9n75vn/pbAcex\nFZCpqd3HQGwHTYlYuM8ufKa6UVnZWIkPUz/EP+P/2e30y9Xyq1rL4hvljbheed3iR9nGkpjI5qyv\nXGFTFcePs+kQQF390X4By+nT+u3S3p5EwqZRFB06EHh4sOXvSUlsNaWTEytTHNtu/+fcXBaPlxeb\ny75+Xfv6169r17MT20UJ24I1yZtw9uZZjWNKTonj+ce7TdheTrqbdHR23FqFhKg79tXXqxO2nR2b\nHmk/T2xIkyVnZ9ZQqn0riMBAdYOprVvZVIePj2ZZYUoKq/duc/gw+2bRsTLG0BuvxDrRlIgFE9z+\nX0f63Dwc4jkEkT6aI+n44HibrPFuM2ECm4YQi1mliJcXW30JsJHyxImGXfeBB9hoOjCQVZNER6tH\n7iIRO94+WcvlbHeb9nJyWF12+74hUVGaI3JC6KbjbUpOieSsZBzPPw47oR0mDZiEKQO7KdrVQ4ui\nBQevH0R6WTq8nbwxM3QmAlwC9H5+0vkkHC9Q92mxE9jhmTueQYh7SLfPVSgVSL2ZivzafAzyGIQo\nvyiba0SlS0sLm8JIT2crIKVStnGvfy8+y779lu2M08bHB3jpJd3L2OvqWAfAjubOBaZPZ6NsqbTr\n8kNivWjhjB4OXDuAHzN/VD3ed3kfREIRJg4wcNh120epHyGthDXQyKrKwoXSC1g3aR3cJe56Pf+h\nkQ/Bw9ED54vPw9XBFTMGz+g2WR+6cQgHrx9EQ2sDYgJj8MDwByAR9bABhoVKT2etR93d2YjZxaXn\n17C3Z/8/erRx6qAbG9mURntlZawPt65Ru1TKpmlycjSPDx/ORuI9nUcntoMS9m3tR7GqY/nHe5Ww\nKxoqVMm6TUNrA04WnMTM0Jl6XUMkFGHu0LmYO3SuXuefLTqLr66o97g6UXACHMdh+ajl+gduoY4c\nYf20ATZK/uMP4JVXet6MydgaG3XvqnPrVufPWbGCrXzMzWXz4HPmqBfVENIZSti3GTpX3JX2vbH1\nOd5eXUvg9MEZAAAV9klEQVQdzt08hwkDJqiOVTVW4Wr5VcT3i+/0eWeKzmgfu3kGy6KX8X46JDmZ\n3Si8epWVzQGsRnnTJsN7VRuDpye7OZiXpz4mEOjeOq2Nnx/rWXLrFrvZac74CX/Y7E1HjuNQWFuI\n6qZqAMCkAZO0zuntdEiASwD6uWpORAoFQsQExnT5vLqWOty15y5M+2wa9l9jDZ6rGqswffd0TP10\nKmQ5MtW5NU01kCvlqse6pj4kIkmfJGu5Uq6zx4qxNDWxVYBtyRpgVR+//26yl9TbY4+pO/B5eLBO\nhMHB3T/PxYWSNdGfTf6nUnSrCO+feR+l9aUQCoSIC4rDI1GPQCQU4UTBCdgJ7DApZBLiguN6/Vqr\nx67G3kt7caXsCnycfHDPsHsQ6NL1FuN/++VvOJZ3DABw77578ck9n+CNk2+oRs+zP5+NlGUp+Dnz\nZxTVFcFJ7IQRviPgJHaCUCAEx3EaCTpxUGKv30d3fsr4Cfuz9kMoEGJR5KJef9jpEhfHmia1EQoB\nb2/W6W/GDKO/XI/4+LAbia2tLAHz/MsMsVA2WSWy8chG1SYCbR4Z+YhJenkYIrc6F5OTJiO7Olvn\n77fP3o7cmlyUN7DVH3k1ecitzkWUfxRcHVxhJ7DDSL+RUHJKxATGYFyQaTeXyKnOweZjm1WPhQIh\nNk3ZBA9HD6O+jkLB9kI8fJitZOzfn5XBzZypuYciIXxGS9PbqW+p10rWAJBelq7jbPMY4D4Afyz9\nQ2clyYezP8SsIbNUyVqhVCC/Jh8AWwkJsI1/Q9xDsHrsaowLGodWRatJPzzblr+3UXJK1Lfq2NG2\nl+zs2F6Rd9/NKipcXdlik0TTf4EgxCLY3JSIo9gRUnupVpLxdvI2U0S6uTq4wt7OXut4kGsQXOxd\nIBQIoeSUaFW2quaN7YXq8ysbK1HRUIFP0z7F1YqrcHVwxd1D7kZCSILO1yupK8HV8qvwdvJGhE9E\nj+a8Qz1D4S/1V/UDH+wxGAFS/WvNe8LNDVi3ju1aIxCweeOOm+wSYq1sckokJScFn19Srwt2l7jj\n+Tuf17s22tTabjDqqviwt7PHd4u+Q1VjFY7kHQHASvmUnBKjAkZBJGSfwf9v7P/D/mv7taZV1sav\nRZiX5v5UR3KP4POLn4MD+/cV4R2BJ2Kf6FGVTH1LPU4VnoKd0A5xwXE6P2z45MYNNl8uFLJtxWiJ\nOOkrtEWYDjeqbiCtOA0uDi6IC46D1N6AXV9N5KFvH8LnF9UfKC9NfAl7LuxRJV9HkSNu/P0G8mrz\nkF6WDoFAgKvlV1HRWAGxUIzEQYmYFDIJzx56Vuvak0Mm44HhD6getyha8Mxvz6BRrrm1+ONjHseo\ngFEoulUER5Gj0eejLdmFC8D776s39LWzA/7+d2BY1/tTEGIUtNJRh0EegzDIY5C5w9Dpv4n/RerN\nVFwtv4oPZ3+IVWNWYeWolZicNBm5Nbn4aO5H8Hfxh7+Lv8YNxfKGckjtpardcMRCMVqVmis6XOw1\nlwZWN1VrJWsAyKjIwM+ZP6PgVgEEECAuOA5LopZY9CYIxtJW791GoWDLzilhE3Oz/r99PBTgEoDD\nSw7jiwVfYNWYVQDYHPu6SeuwOmY1BroP1Fnv7O3krarDlogkWrXlUnsp7uh/h9ZzPB21tzbJKM9A\nwa0CAAAHDicKTuBE/gmt86xRba32sa5WLRLSV2x2SoRPmuXN2HRsk8Ymv7FBsVgxaoXO84/mHsXJ\ngpMQCUXwcvJCs7wZHo4emBwyWWd71cyKTOxI3YHa5lrVdmdH845q3ZiND47HsuhlRn1vlujrr9l+\njO3Nnt2z/RwJMRRNifDc6cLTOF98HpWNlZCIJAh0CcSpwlOYO3SuVnXLb9d/w9fpX6sPVACrY1Yj\nyr/zddJhXmHYPHUzCmsL4enoCRcHF1yvvI7MykyN84JcjLAVDA/Mncs66p06pd6q6667zB0VIZSw\neyWvJg8XSy7CXeKOmMAYOIgcTPI6P2b+iGsV18CBg5JTorS+FKMDRqOmqUYrYctyZVrPl+XIukzY\nAGsyNcBd3X1oQcQCvPnXm2hobQAAhLiFaPQ1sWb29mxp+YMPsoTdvpc1IeZECdtAHUsDD944iGfv\neBZOYiejvk5dSx2K64pR11KHupY6cOAgFopR6lKqkWDbtCpaUd1UjfyafLQoWuDt5I3BHl3vTqNL\niHsINk3ZhMtll+EkdkK4dzjvm0f1lD2/KxOJFTL4pmN1dTUWLlyI8PBwRERE4OTJk8aMy6K1Klrx\n3dXvNI4V1xXjaO5Ro79WQ2sDmuXNEAjUu89wHAcOnKrmur1Qz1BcKrmE6qZqNLQ2IK8mDzVNNQa9\ntqPYETGBMT1eSEMIMQ2DR9hPPvkkZs2aha+//hpyuRz19cZfimypaptrdZbCtb8paCy+zr5Qcko4\ni53hJHaCUqmEndAOrg6uKG8o15gSaWhtQFl9GRzFjqhtroWz2BlBrkGoaqpCk7zJajYx6KncXLYL\n+YAB1JSJ8JtBCbumpgZHjx5FUlISu4hIBDc3N6MGZsk8HT3h4+SDsoYyjeNDvYea5PXmDZuH/Np8\n1DbXQiKWoL9bf3g5eWlMv5zIP4EPzn6Aw9mHUddSB6m9FFJ7KbydvMGBs8mKnfp64O23gezbiz37\n92cLYAzZpYYQS2DQlEh2djZ8fHywfPlyjB49GqtWrUJDQ4OxY7NYAoEAy6KXwdWB7ZgqgADxwfEm\n64o3O2w2JgyYgPh+8YgNikWgSyASBiSoEnZ9Sz32XNyDjPIMONixG591LXW41XwL+bX5GOU/Co5i\nA7YE57nkZHWyBtgGA7/8Yr54COktg0bYcrkcqampeOeddzB27FisWbMGW7Zswcsvv6xx3vr161U/\nJyQkICEhoTexWpRQz1BsnroZOdU5cJe4m7R5lJvEDS/c+QJSclNQ2ViJEb4jNDZByK7ORquyFXUt\ndbC3s4eHxEN1g9LXyRdLopaYLDZL1nHPxM6OEWJOMpkMMplMr3MNWjhTXFyM+Ph4ZN8evhw7dgxb\ntmzBzz//rL6wlS2cya3OxZHcI2hRtCA2OBbDfYebOySV8oZyvHT4JaSVpGncYIz0jcR9Effh/sj7\nzRid+Xz1FXDokOaxSZNYuR4hlsro/bD9/f3Rr18/ZGayhRWHDh1CZGSk4RFauGsV17D1z604ln8M\np4pO4e1Tb6t2hLEE3k7emDJwCgZ5DILYjhUNe0g8MNxnOO4Ktd0VH3fdBfRrt0NbUBBbsUgIXxm8\nND0tLQ0rV65ES0sLBg8ejE8++UTjxqM1jbDfP/0+zpec1zjm4+SD/0z5j9FfK7MiEyfyT0AoEOLO\n/ndioMfAHj03ozwDTfImRPpG2mTtdEccx1qlchwweDBViRDLR+1Ve2nbn9uQVZWlccxR5Ij/zfyf\nUV8n9WYqPjz7oaovtVAgxN/H/R3hPuFGfR1CiOWiLcJ6Kdo/WuvYKP9RRn+dA9cOqJI1wLba+vX6\nr0Z/Hb5oaACam80dBSGWw2aWpudU5+DczXOQ2ksRFxwHFwf9i3GnDpqKysZKHMs/BrlSjmi/aJPc\nyLvVot3Ds7ZZR69PG5CeDrzzDuvj8cwzQGDXG80TYhNsImEfzz+OT9M+VY1ef7vxG56941mdrUZ1\nEQqEWDR8ERZELIBCqTBZk6covyit5k26RveWhOM4VDVVwdXBVedSeUNduQLI5exPVhYlbEIAG0jY\nHMfh+6vfa0w11DTX4Pfs33s8ShYJRUZJSmX1ZXC2d9ZqFDU/fD7qW+tx9uZZ1S4vs4bM6vXrmUp2\nVTZ2nt+J0vpSOIudsSB8gdYGCYaaOBG4dg2QSIAxY4xySUJ4z+pvOrYoWvDEgSe0jkf5RWH12NV9\nGsvNWzfx4dkPUVRXBJFQhIQBCVgYsVCrkqNJ3gQBBCYbyRuDklPixcMvorKxUnVMKBBiQ8IG+Dr7\nmjEyQvjNpm862tvZ62wvGu7d95UXO8/tRFFdEQBArpTjUPYhnTujS0QSi07WAFBSV6KRrAGWxK+W\nXzVTRIRYP6tP2ACwJGoJ/KX+AFjfj9igWEwcMLFPY6htrkVebZ7W8Uull/o0DmNxk7hBLNTu7O/j\n5GOGaAixDVY/hw0A/lJ/rJ+0HkW3iuAkdoKHo0efx+AocoSjyFGrLas5YjEGJ7ETZofN1ugLPsJ3\nBIZ509bihJiK1c9hW5Jfs37Ft1e/VT12c3DDCxNegLvE3YxR9U5OdQ4yyjMQ4BKA4b7DIRTYxJc2\nQkyGVjpakPSydKSVpMHVwRV39r9T1aKVEEIAStiEEMIbNl0lQggh1oISNiGE8AQlbEII4QlK2IQQ\nwhM2UYdtCwpqC3Asj3UTjA2KxRCvIeYOiRBiZFQlYgWyKrPwxsk3IFfKAbDVnI+OehRjg8aaOTJC\nSE9RlYiVO3j9oCpZAwAHDgeyDpgxIkKIKVDCtgLtd0pXHWvWPkYI4TdK2FZgpN9I7WO+2scIIfxG\nCdsKzAidgfHB42EnsIMAAkT5ReG+yPvMHRYhxMjopqMVaZI3QaFUwNne2dyhEEIMRL1ECCGEJ6hK\nhBBCrAAlbEII4QlK2IQQwhO9StgKhQKjRo3CnDlzjBUPIYSQTvQqYb/55puIiIiAQCAwVjyEEEI6\nYXDCLigowP79+7Fy5UqqBiGEkD5gcLe+p556Ctu2bUNtbW2n56xfv171c0JCAhISEgx9OUIIsUoy\nmQwymUyvcw2qw/75559x4MABvPvuu5DJZHjttdfw008/aV6Y6rAJIaTHjF6Hffz4cfz4448YOHAg\nFi9ejMOHD2PJkiW9CpIQQkjXer3SMSUlBf/9739phE0IIUZg8pWOVCVCCCGmR71ECCHEglAvEUII\nsQKUsAkhhCcoYRNCCE9QwiaEEJ6ghE0IITxBCZsQQniCEjYhhPAEJWxCCOEJStiEEMITlLAJIYQn\nKGETQghPUMImhBCeoIRNCCE8QQmbEEJ4ghI2IYTwBCVsQgjhCUrYhBDCE5SwCSGEJyhhE0IIT1DC\nJoQQnqCETQghPEEJmxBCeIISNiGE8AQlbEII4QlK2IQQwhMGJez8/HxMnjwZkZGRGD58ON566y1j\nx0UIIaQDAcdxXE+fVFxcjOLiYkRHR6Ourg5jxozB999/j/DwcPWFBQIYcGlCCLFpXeVOg0bY/v7+\niI6OBgBIpVKEh4ejqKjI8AgJIYR0S9TbC+Tk5ODcuXOIjY3V+t369etVPyckJCAhIaG3L0cIIVZF\nJpNBJpPpda5BUyJt6urqkJCQgJdeegnz5s3TvDBNiRBCSI8ZfUoEAFpbW7FgwQI8/PDDWsmaEEKI\n8Rk0wuY4DkuXLoWXlxfeeOMN3RemETYhhPRYV7nToIR97NgxTJw4ESNHjoRAIAAAbN68GTNnztTr\nRQkhhOhm9ITd2xclhBCim0nmsAkhhPQtStiEEMITlLAJIYQnKGETQghPUMImhBCeoIRNCCE8QQmb\nEEJ4ghI2IYTwBCVsQgjhCUrYhBDCE5SwCSGEJyhhE0IIT1DCJoQQnqCETQghPEEJmxBCeIISNiGE\n8AQlbEII4QlK2IQQwhOUsAkhhCcoYRNCCE9QwiaEEJ6ghE0IITxBCZsQQniCEjYhhPAEJWxCCOEJ\ngxN2cnIyhg0bhiFDhmDr1q3GjMkiyWQyc4dgNNb0XgDrej/W9F4A63o/lvBeDErYCoUC//d//4fk\n5GRcuXIFX3zxBdLT040dm0WxhH9ZxmJN7wWwrvdjTe8FsK73YwnvxaCEferUKYSGhiIkJARisRgP\nPPAAfvjhB2PHRgghpB2DEnZhYSH69eunehwcHIzCwkKjBUUIIUSbgOM4rqdP+uabb5CcnIwdO3YA\nAHbv3o2//voLb7/9tvrCAoHxoiSEEBvSWVoWGXKxoKAg5Ofnqx7n5+cjODhYrxckhBBiGIOmRGJi\nYnDt2jXk5OSgpaUF+/btw9y5c40dGyGEkHYMGmGLRCK88847mDFjBhQKBR599FGEh4cbOzZCCCHt\nGFyHfddddyEjIwNZWVl4/vnnNX5nLTXa+fn5mDx5MiIjIzF8+HC89dZb5g6p1xQKBUaNGoU5c+aY\nO5Req66uxsKFCxEeHo6IiAicPHnS3CH1yubNmxEZGYkRI0bgwQcfRHNzs7lD0tuKFSvg5+eHESNG\nqI5VVlYiMTERYWFhmD59Oqqrq80YYc/oej9PP/00wsPDERUVhfnz56OmpqbP4zL6SkdrqtEWi8V4\n4403cPnyZZw8eRLvvvsub99LmzfffBMRERFWcVP4ySefxKxZs5Ceno4LFy7w+lteTk4OduzYgdTU\nVFy8eBEKhQJ79+41d1h6W758OZKTkzWObdmyBYmJicjMzMTUqVOxZcsWM0XXc7rez/Tp03H58mWk\npaUhLCwMmzdv7vO4jJ6wralG29/fH9HR0QAAqVSK8PBwFBUVmTkqwxUUFGD//v1YuXIl728K19TU\n4OjRo1ixYgUANk3n5uZm5qgM5+rqCrFYjIaGBsjlcjQ0NCAoKMjcYeltwoQJ8PDw0Dj2448/YunS\npQCApUuX4vvvvzdHaAbR9X4SExMhFLKUGRsbi4KCgj6Py+gJ21prtHNycnDu3DnExsaaOxSDPfXU\nU9i2bZvqPzo+y87Oho+PD5YvX47Ro0dj1apVaGhoMHdYBvP09MTatWvRv39/BAYGwt3dHdOmTTN3\nWL1SUlICPz8/AICfnx9KSkrMHJHx7Ny5E7Nmzerz1zX631xr+KrdUV1dHRYuXIg333wTUqnU3OEY\n5Oeff4avry9GjRrF+9E1AMjlcqSmpmL16tVITU2Fs7Mzr75yd3T9+nX873//Q05ODoqKilBXV4c9\ne/aYOyyjEQgEVpMbNm7cCHt7ezz44IN9/tpGT9j61GjzSWtrKxYsWICHH34Y8+bNM3c4Bjt+/Dh+\n/PFHDBw4EIsXL8bhw4exZMkSc4dlsODgYAQHB2Ps2LEAgIULFyI1NdXMURnuzJkzGD9+PLy8vCAS\niTB//nwcP37c3GH1ip+fH4qLiwEAN2/ehK+vr5kj6r1du3Zh//79ZvswNXrCtqYabY7j8OijjyIi\nIgJr1qwxdzi9smnTJuTn5yM7Oxt79+7FlClT8Omnn5o7LIP5+/ujX79+yMzMBAAcOnQIkZGRZo7K\ncMOGDcPJkyfR2NgIjuNw6NAhREREmDusXpk7dy6SkpIAAElJSbwe8ACs+m3btm344YcfIJFIzBME\nZwL79+/nwsLCuMGDB3ObNm0yxUv0iaNHj3ICgYCLiorioqOjuejoaO7AgQPmDqvXZDIZN2fOHHOH\n0Wvnz5/nYmJiuJEjR3L33nsvV11dbe6QemXr1q1cREQEN3z4cG7JkiVcS0uLuUPS2wMPPMAFBARw\nYrGYCw4O5nbu3MlVVFRwU6dO5YYMGcIlJiZyVVVV5g5Tbx3fz8cff8yFhoZy/fv3V+WCv/3tb30e\nl0G9RAghhPQ9/pcLEEKIjaCETQghPEEJmxBCeIISNiGE8AQlbEII4QlK2IQQwhP/Hz/+c87RR8m6\nAAAAAElFTkSuQmCC\n"
}
],
"prompt_number": 43
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"error_and_permutation = find_minimal_error_and_permutation(real_a_n=assignements_n, estimated_a_n=np.argmax(pa_n_m, axis=1), m=m)\n",
"print 'Minimum error of {error}% with re-ordering {permutation} of clusters'.format(**error_and_permutation) \n",
"print 'The estimated probabilities of each cluster are : {} VS the real ones : {}'\\\n",
" .format(estimated_prob_m[error_and_permutation['permutation']], prob_m)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Minimum error of 16.0% with re-ordering [1 2 0] of clusters\n",
"The estimated probabilities of each cluster are : [ 0.34255195 0.42465982 0.23278822] VS the real ones : [ 0.27417081 0.41692802 0.30890117]\n"
]
}
],
"prompt_number": 44
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Comparison to SKlearn implementations"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def my_kmeans(m, x_n):\n",
" n = int(x_n.shape[0])\n",
" step = 0\n",
" centroids_mu_m = x_n[np.random.choice(n, size = m, replace=False),:]\n",
" old_estimated_a_n = estimated_a_n = np.repeat(m, n)\n",
" dist_n = np.repeat(np.infty, n)\n",
" \n",
" while step == 0 or not np.all(centroids_a_n == centroids_a_old_n):\n",
" centroids_mu_m, new_estimated_a_n, dist_n = kmeans_iteration(x_n, centroids_mu_m)\n",
" estimated_a_n, old_estimated_a_n = new_estimated_a_n, estimated_a_n\n",
" step += 1\n",
" return centroids_a_n\n",
"\n",
"def my_em(m, x_n):\n",
" n = int(x_n.shape[0])\n",
" step = 0\n",
" centroids_mu_m = x_n[np.random.choice(n, size = m, replace=False),:]\n",
" estimated_prob_m = np.repeat(1.0/m, m)\n",
" old_pa_n_m = pa_n_m = np.repeat(1.0/3.0, n * m).reshape(n,m)\n",
" sigma_m = np.repeat(1, m)\n",
" dist_n = np.repeat(np.infty, n)\n",
" \n",
" while step == 0 or not np.all(pa_n_m == old_pa_n_m):\n",
" centroids_mu_m, estimated_prob_m, new_pa_n_m, dist_n_m = em_iteration(x_n, centroids_mu_m, estimated_prob_m, sigma_m)\n",
" pa_n_m, old_pa_n_m = new_pa_n_m, pa_n_m \n",
" step += 1\n",
" return centroids_a_n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 45
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit my_kmeans(m, x_n)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1000 loops, best of 3: 275 us per loop\n"
]
}
],
"prompt_number": 46
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from sklearn.cluster import KMeans\n",
"km = KMeans(n_clusters=m, init='random', precompute_distances=False, n_init=1)#, n_jobs=1)\n",
"\n",
"%timeit km.fit(X=x_n)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1000 loops, best of 3: 597 us per loop\n"
]
}
],
"prompt_number": 48
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit my_em(m, x_n)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-49-dc4bb81ea95f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmagic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mu'timeit my_em(m, x_n)'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.pyc\u001b[0m in \u001b[0;36mmagic\u001b[1;34m(self, arg_s)\u001b[0m\n\u001b[0;32m 2134\u001b[0m \u001b[0mmagic_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmagic_arg_s\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0marg_s\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpartition\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m' '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2135\u001b[0m \u001b[0mmagic_name\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmagic_name\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlstrip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprefilter\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mESC_MAGIC\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2136\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmagic_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmagic_arg_s\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2137\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2138\u001b[0m \u001b[1;31m#-------------------------------------------------------------------------\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.pyc\u001b[0m in \u001b[0;36mrun_line_magic\u001b[1;34m(self, magic_name, line)\u001b[0m\n\u001b[0;32m 2060\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getframe\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstack_depth\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf_locals\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2061\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2062\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2063\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2064\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/usr/local/lib/python2.7/dist-packages/IPython/core/magics/execution.pyc\u001b[0m in \u001b[0;36mtimeit\u001b[1;34m(self, line, cell)\u001b[0m\n",
"\u001b[1;32m/usr/local/lib/python2.7/dist-packages/IPython/core/magic.pyc\u001b[0m in \u001b[0;36m<lambda>\u001b[1;34m(f, *a, **k)\u001b[0m\n\u001b[0;32m 189\u001b[0m \u001b[1;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 191\u001b[1;33m \u001b[0mcall\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 192\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 193\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/usr/local/lib/python2.7/dist-packages/IPython/core/magics/execution.pyc\u001b[0m in \u001b[0;36mtimeit\u001b[1;34m(self, line, cell)\u001b[0m\n\u001b[0;32m 804\u001b[0m \u001b[0mnumber\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 805\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 806\u001b[1;33m \u001b[1;32mif\u001b[0m \u001b[0mtimer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtimeit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumber\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>=\u001b[0m \u001b[1;36m0.2\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 807\u001b[0m \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 808\u001b[0m \u001b[0mnumber\u001b[0m \u001b[1;33m*=\u001b[0m \u001b[1;36m10\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/usr/lib/python2.7/timeit.pyc\u001b[0m in \u001b[0;36mtimeit\u001b[1;34m(self, number)\u001b[0m\n\u001b[0;32m 193\u001b[0m \u001b[0mgc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdisable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 194\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 195\u001b[1;33m \u001b[0mtiming\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minner\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mit\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtimer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 196\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 197\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mgcold\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m<magic-timeit>\u001b[0m in \u001b[0;36minner\u001b[1;34m(_it, _timer)\u001b[0m\n",
"\u001b[1;32m<ipython-input-45-88ae020493d2>\u001b[0m in \u001b[0;36mmy_em\u001b[1;34m(m, x_n)\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[0mdist_n\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrepeat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfty\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m \u001b[1;32mwhile\u001b[0m \u001b[0mstep\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m0\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpa_n_m\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mold_pa_n_m\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 24\u001b[0m \u001b[0mcentroids_mu_m\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mestimated_prob_m\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnew_pa_n_m\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdist_n_m\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mem_iteration\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx_n\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcentroids_mu_m\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mestimated_prob_m\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msigma_m\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[0mpa_n_m\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mold_pa_n_m\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnew_pa_n_m\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpa_n_m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/usr/local/lib/python2.7/dist-packages/numpy/core/fromnumeric.pyc\u001b[0m in \u001b[0;36mall\u001b[1;34m(a, axis, out, keepdims)\u001b[0m\n\u001b[0;32m 1707\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1708\u001b[0m \"\"\"\n\u001b[1;32m-> 1709\u001b[1;33m \u001b[0marr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0masanyarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1710\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1711\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/usr/local/lib/python2.7/dist-packages/numpy/core/numeric.pyc\u001b[0m in \u001b[0;36masanyarray\u001b[1;34m(a, dtype, order)\u001b[0m\n\u001b[0;32m 370\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 371\u001b[0m \"\"\"\n\u001b[1;32m--> 372\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msubok\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 373\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 374\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mascontiguousarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"prompt_number": 49
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from sklearn.mixture import GMM\n",
"gmm = GMM(n_components=m, covariance_type='diag', random_state=None, thresh=0.01, min_covar=0.001)\n",
"fitted_model = gmm.fit(X=x_n)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 51
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fitted_model.means_\n",
"fitted_model.weights_"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 54,
"text": [
"array([ 0.30678353, 0.34441477, 0.3488017 ])"
]
}
],
"prompt_number": 54
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment