Skip to content

Instantly share code, notes, and snippets.

@znd4
Created July 1, 2018 13:24
Show Gist options
  • Save znd4/b58a29cac65147c1521d517312d2c277 to your computer and use it in GitHub Desktop.
Save znd4/b58a29cac65147c1521d517312d2c277 to your computer and use it in GitHub Desktop.
kmedoids visualization example
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import random\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.cluster import KMedoids, AgglomerativeClustering\n",
"from sklearn.metrics import pairwise_distances\n",
"plt.style.use('dark_background')\n",
"np.random.seed(42)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Constants"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"n_movies = 10000\n",
"n_classes = 8\n",
"pop_per_class = 200\n",
"baseline = .005\n",
"\n",
"# Coefficients for gaussian generation\n",
"max_gauss_prob = 0.1\n",
"sigma = 500\n",
"\n",
"# Coefficients for \"Step-function\" generation\n",
"in_class_prob = .08\n",
"n_popular_per_class = 200\n",
"\n",
"populations = [pop_per_class]*n_classes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Gaussian probability distributions"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"def gauss(x, mu, sig, max_val=0.1):\n",
" return max_val * np.e**(-(x-mu)**2/(2*sig**2))\n",
"\n",
"means = np.linspace(0, n_movies, n_classes+2)[1:-1]\n",
"r = np.arange(n_movies)\n",
"chances = (\n",
" baseline + gauss(r, mu, sigma, max_val=max_gauss_prob)\n",
" for mu in means\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Step-function probability distributions"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"r = np.arange(n_movies)\n",
"likely_movies_list = (\n",
"# np.random.randint(0, n_movies, size=20)\n",
" np.random.choice(r, size=n_popular_per_class, replace=False)\n",
" for _ in range(n_classes)\n",
")"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"def zero_except(size, indices):\n",
" arr = np.zeros(size)\n",
" arr[indices] = 1\n",
" return arr\n",
"chances = (baseline + in_class_prob*zero_except(n_movies, likely_movies)\n",
" for likely_movies in likely_movies_list\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Generate `movie_views` array"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"movie_views = ((np.random.random((population, n_movies)) < prob_array)\n",
" for prob_array, population in zip(chances, populations)\n",
")\n",
"class_labels = (np.full((population, 1), class_label, dtype=int)\n",
" for class_label, population in enumerate(populations)\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"views_array = np.concatenate(list(movie_views), axis=0)\n",
"labels_array = np.concatenate(list(class_labels), axis=0)\n",
"\n",
"combined_array = np.concatenate((views_array, labels_array), axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# KMedoids"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"kmedoids = KMedoids(n_clusters=n_classes,\n",
" metric='jaccard',\n",
"# init='k-medoids++',\n",
"# init='random',\n",
" init='heuristic',\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"KMedoids(init='heuristic', max_iter=300, metric='jaccard', n_clusters=8,\n",
" random_state=None)"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"kmedoids.fit(views_array)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEPCAYAAACqZsSmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGz5JREFUeJzt3XtUVXX+//HX4eoVZKzUUkFxvHQhsxG8TEkXU0w0J8YJzKFWM2NjNpPSzRydll0nnPqq37IlNVlTMX6ndNKSsAuMZpao0WhGmF/wNsiYP4SDXAT5/P4I+UqW7IOwN7ifj7Xea8V2Hz4vCF9u9j5nH48kIwCAq/g5HQAAYD/KHwBciPIHABei/AHAhSh/AHAhyh8AXMi28k9KSpLX6200xhjNnTvXrggAgFMYJ2b69OkmLy/PhISEOLI+wzCMm8dT/x+26tmzp7788kuNHz9en376qd3LA4DrOVL+aWlp8ng8+tWvfvWD+yRG3q0Kb6WNqZrHExTkdARL6o5VOB3BEu/PhjkdoUldV213OoIlfh07OB3BkkNpvZ2OYEmP5AKnI1iyrUOmDhw40OR+tpd/jx49tGfPHl166aUqLCz83n169+6t/fv32xkLAM4Zffr0afIfgACbsjRITExUdnb2Dxa/JJWVlUmSkgb8rs0f/XPk37I48m85HPm3rPZw5N+pa0e9/r9LGzr0TGwv//j4eK1YscLSvhXeSlWUtfXyP+F0BEvqjrXt7+NJxyqPOx2hSf5t/GfyJL8a28/oNkt5bbXTESzp2k7+v1tl6/P8PR6Phg8frs2bN9u5LADgO2wt/+7du6tr164qKiqyc1kAwHfYetrnm2++kcfjsXNJAMD34PYOAOBClD8AuBDlDwAuRPkDgAtR/gDgQpQ/ALgQ5Q8ALkT5A4ALUf4A4EKUPwC4EOUPAC5E+QOAC1H+AOBClD8AuBDlDwAuRPkDgAtR/gDgQpQ/ALgQ5Q8ALkT5A4AL2foG7r4yNbUyNbVOxzgjU13tdIRzSnHccacjNCnkb3VOR7Ck7tgxpyNYUvVFN6cjWGJq2v7PpqmxXum2HvmHh4crIyNDZWVlKigo0LRp0+xcHgBQz9byf+utt7Rt2zaFhYVp6tSpev755xUREWFnBACAbDztM3LkSIWGhmr+/PkyxignJ0cxMTH65ptv7IoAAKhn25H/FVdcoS+++EKLFy/WoUOHtGPHDvXv31/l5eV2RQAA1LOt/MPCwjRu3Djt3r1bffr0UUpKitLT0zVgwAC7IgAA6tlW/tXV1dq3b5+WLl2qmpoarV+/Xhs3btS4cePsigAAqGdb+efn5ys0NLTRNn9/f3k8HrsiAADq2Vb+7733nqqrq7Vw4UL5+flp/PjxGjVqlNasWWNXBABAPdue7VNZWalrrrlGzz77rI4cOaLi4mIlJSVp3759dkUAANSz9RW++fn5Gjt2rJ1LAgC+B/f2AQAXovwBwIUofwBwIcofAFyI8gcAF6L8AcCFKH8AcCHKHwBciPIHABei/AHAhSh/AHAhyh8AXIjyBwAXovwBwIUofwBwIcofAFyI8gcAF7L1nbx8tee/Lpe35rjTMc5o4K9znI5wTvnxL7c7HaFJexaNcDqCJZH3fuJ0BEsi5m9xOoIrceQPAC5E+QOAC1H+AOBCtpZ/SkqKqqur5fV6G6ZXr152RgAAyObyHzp0qFJSUtS1a9eGKSoqsjMCAEAOlP/nn39u55IAgO9hW/kHBwdr0KBBuv/++1VcXKzc3FxNmDDBruUBAKewrfwvuOACffzxx1q6dKl69+6tefPmaeXKlRo8eLBdEQAA9Wx7kdf+/fsVGxvb8PE777yjrKwsxcXFKS8vz64YAADZeOQfFRWl++67r9G24OBgVVVV2RUBAFDPtvIvKyvTH//4R8XHx8vj8SghIUEjRozQ6tWr7YoAAKhn22mfwsJCJSYm6oknnlB6erp2796tSZMm6dChQ3ZFAADUs/XGbmvXrtXatWvtXBIA8D24vQMAuBDlDwAuRPkDgAtR/gDgQpQ/ALgQ5Q8ALkT5A4ALUf4A4EKUPwC4EOUPAC5E+QOAC1H+AOBClD8AuJCtd/X01cA/fK2KskqnY5zRCacDWJT571ynI1gy7sKhTkdoUuT9OU5HOLfUtZe/RecWjvwBwIUofwBwIcofAFyI8gcAF6L8AcCFml3+gYGBuvLKK9W1a9eWzAMAsIHl8o+MjFR2drZiYmLUsWNHbdmyRVu2bNHevXsVExPTmhkBAC3McvkvXbpUXq9XhYWFmj59unr37q1BgwZp2bJlevrppy0vOGTIEFVWVioyMrJZgQEAZ89y+V911VWaPXu2iouLddNNN+mdd97R119/rbS0NA0dau2FOf7+/nrppZfUoUOHZgcGAJw9y+VfVVWlwMBAderUSWPGjFFGRoYkqWfPniotLbX0OebOnauPPvqoeUkBAC3G8u0dMjMzlZaWJq/Xq4qKCq1du1bXXnutFi9erDVr1jT5+KioKP3iF7/Q8OHDlZKSclahAQBnx/KR/4wZM7R161ZVVVXpxhtvVEVFhYYPH67s7Gzdc889Z3xsYGCgXnrpJc2YMUNVVVVnHRoAcHYsH/kfO3bstJL/05/+ZOmxCxYsUHZ2tj7++GPf0gEAWoVPz/P/+c9/rk8//VQlJSXq16+fUlNTLZ3CSUhI0B133KGSkhKVlJRIkrZv367ExMTmpQYAnBXL5Z+cnKznnntOq1evVlBQkCQpLy9PCxYs0AMPPHDGxw4ZMkTdunVTWFiYwsLCJEnDhg1Tenr6WUQHADSX5fJPSUnRb3/7Wz355JM6ceLb+2+/+OKLuv322zVjxoxWCwgAaHmWz/lHRkZq69atp23Pzc1Vz549fVrU4/H4tD8AoGVZPvL/6quvdP3115+2ferUqcrLy2vRUACA1mX5yP+hhx7SG2+8oZ/85CcKCAjQnXfeqQEDBmjixIlKSEhozYwAgBZm+cj/3XffVXR0tIKDg7Vz506NHTtWVVVVGjFihNauXduaGQEALcynN3DftWuXbr/99oaPu3fvriNHjrR4KABA67J85H/eeedpxYoVioqKkp+fnzIyMlRcXKzdu3dr0KBBrZkRANDCLJf/s88+q8GDB8vr9SoxMVGjR4/W+PHjtXHjRi1ZsqQ1MwIAWpjl0z5jx47V1VdfrYKCAi1atEgZGRl6//33VVBQoNzc3NbMCABoYZaP/D0ejyoqKhQQEKDrrruu4ZbOXbp0UWVlZasFBAC0PMtH/hs2bFBqaqqOHj2qwMBAvfXWW7rsssu0ePFiffjhh62ZEQDQwiyX/4wZM/T8889r2LBhmjp1qkpKSvTggw/K6/Xq7rvvbpVwxhgZY1rlc7cU//p7FbV1DxRbe7c1p3mCg52O0CS/dpBRUpv/u9PetIc7E/iHdLS8r+XyP3TokG666aZG25q6oRsAoG2yfM6/Y8eOevjhhzVgwABJ0vPPPy+v16sPPvjA53v7AACcZbn8Fy9erFtvvVVBQUGaNGmSkpOTNXv2bFVWVvJUTwBoZyyf9pk8ebImTpyoXbt2ae7cuXrvvff0wgsvaNOmTdq8eXNrZgQAtDCfTvsUFxfL4/Fo3LhxevfddyV9e1Hp5P39AQDtg+Uj/5ycHD3wwAM6fPiwwsLCtHr1avXq1UuPPPKIPvnkk9bMCABoYZbLf9asWXr99dcVERGhmTNnqqioSEuWLNGgQYM0ZcqU1swIAGhhlsv/yy+/1BVXXNFo20MPPaTy8vIWDwUAaF0+3dJ56NChuvjii+Xv7y/p2xc9BAcH68orr9Sdd97ZKgEBAC3Pp3fyeuSRR1ReXq7OnTurtLRUoaGhkqR169a1WkAAQMuz/GyfGTNm6L777lNoaKiKiooUFRWliy66SJ988olycnJaMyMAoIVZLv+ePXvqzTfflCTl5uZq5MiRKi4u1v3336/p06db+hxJSUnKz8+X1+vVli1bNGrUqOalBgCcFcvlf/jwYXXv3l2SlJ+fr8svv1ySdPDgQV144YVNPn7gwIFatmyZbrnlFnXt2lVpaWl64403mhkbAHA2LJf/W2+9peXLl+vyyy9XVlaWfvnLX+rqq6/WnDlztHfv3iYfn5+frwsvvFDbt29XUFCQwsLCeP9fAHCI5Qu+9957r55++mldeumleu2113TzzTfrgw8+0LFjx5SYmGjpcxw7dkxDhw7V1q1bVVtbq/j4+GYHBwA0n0dSs2/63blzZ1VVVfl0e4eAgG//vbn11lu1ePFiDRgwQIcPH260T0hIiEpLSzWlz12qKGvb7xLmCfDp2bKOuezDEqcjWPL5iLZ/r3zu5+9O7eF+/p1COmrVvv9WaGioysrKzrjvGZsrLi7O8qIn39axKbW1tZKkFStWaM6cOYqNjdXf//53y+sAAM7eGcv/7bffbvTxySOJk/8CGmPk8XhkjGk4ov8hEyZM0MyZMzVx4sSGbUFBQSotLW1WcABA853xgq+/v3/DBAUF6fHHH9ddd93VsG3r1q16+OGHFRQU1ORC27Zt0+jRo3XzzTfL399fd911lwIDA7Vhw4YW+2IAANZYfrZPamqqpk2bpj179jRse/bZZ5WcnKyFCxc2+fji4mJNmTJF8+fP15EjRzRlyhTFxcWpqqqqeckBAM1m+YJvUVGRpkyZctrtm0ePHq2VK1eqd+/eLRaKC74tjwu+LYcLvu50rl3w9enNXL7vDp4lJSUN9/gBALQPlss/KytLTz31lMLCwhq2hYaG6rHHHlN2dnZrZAMAtBLL5yx+97vf6f3339fBgwe1d+9eGWMUHh6ur7/+WpMnT27NjACAFma5/Pfv369LL71UY8eO1ZAhQ3T8+HHl5+dr/fr1nFsEgHbGp6uVNTU1WrduHffvB4B2rk0/VaXuWIXqjrXtZ/v857cxTkewxIzc7nQES0xNjdMRmlRX1z5+0z1y65VOR7DkvP/5l9MRLPl/N13mdIQmVXds+jVXJ1m+4AsAOHdQ/gDgQpQ/ALgQ5Q8ALkT5A4ALUf4A4EKUPwC4EOUPAC5E+QOAC1H+AOBClD8AuBDlDwAuRPkDgAtR/gDgQpQ/ALgQ5Q8ALmRr+U+aNEk7d+5UaWmpcnJyNHr0aDuXBwDUs638+/Xrp1deeUUzZ85Ut27d9Mwzz2jNmjUKCQmxKwIAoJ5t5R8eHq60tDRt2LBBxhi9/vrrqqur0+DBg+2KAACoZ9t7+GZnZys7O7vh45iYGHXp0kX5+fl2RQAA1HPkgm9kZKTefPNNzZ8/X0ePHnUiAgC4mu3lHx0drU2bNmn58uVatGiR3csDAGTjaR9JiouLU3p6ulJSUvTiiy/auTQA4BS2lX/fvn21cuVK3XbbbVq1apVdywIAvodtp31mz56tzp076+WXX5bX622YMWPG2BUBAFDPtiP/2bNna/bs2XYtBwA4A27vAAAuZOsFX5/V1Ul1J5xOcUZV5zmdwJq6qiqnI5wzTBv/mTzpRLDTCaypO3bM6QiW+NU6naBpvmTkyB8AXIjyBwAXovwBwIUofwBwIcofAFyI8gcAF6L8AcCFKH8AcCHKHwBciPIHABei/AHAhSh/AHAhyh8AXIjyBwAXovwBwIUofwBwIcofAFyI8gcAF6L8AcCFHCn/hIQEbdiwwYmlAQCyufz9/Pw0Z84cvfrqq/J4PHYuDQA4ha3l/+STT2ry5Ml68skn7VwWAPAdtpb/008/rTFjxqigoMDOZQEA32Fr+R86dMjO5QAAP4Bn+wCAC1H+AOBCAU4HOBO/zp3kd6JtPyuoyz7jdARr2smzqzwBgU5HaJKpOe50BEu6HjzhdARL/MPCnI5giTe87R8rm2DrGdv+VwMAaHGOlP/LL7+sq666yomlAQDiyB8AXInyBwAXovwBwIUofwBwIcofAFyI8gcAF6L8AcCFKH8AcCHKHwBciPIHABei/AHAhSh/AHAhyh8AXIjyBwAXovwBwIUofwBwIcofAFyI8gcAF6L8AcCFKH8AcCHKHwBcyNbyj46O1vbt21VeXq5NmzZpwIABdi4PAKhnW/kHBwdr9erVSk1NVVhYmN59912tWLHCruUBAKcIsGuha665RqWlpUpPT5ckPfbYY0pJSdHgwYOVl5f3vY/p1LWDXfGarUvHIKcjWNIppKPTESzxBAQ6HaFJpsa2vzZnxb9TO/nZ7No+fja7BLf972dnHzLa9lP83ZKvq6tTQUGBhgwZclr5h4SESJJey3vGrnjnvv92OgAAu4SEhKisrOyM+9hW/p07d1ZlZWWjbRUVFerUqdNp+x44cEB9+vRpMjwAoLGQkBAdOHCgyf1sK/+Kigp17Nj417tOnTqpvLz8e/e3Eh4A0JjVg2bbLvjm5eVp4MCB/7ewn5/69+//g+f7AQCtx7byz8rKUvfu3ZWcnKzAwEDNmzdPu3fv1ldffWVXBADAKYxdM2zYMLNlyxZTVlZmNmzYYPr372/b2gzDMMz/jaf+PwAALtLmbu9wrrwKOCEhQRs2bHA6hk8mTZqknTt3qrS0VDk5ORo9erTTkXySlJSk/Px8eb1ebdmyRaNGjXI6UrMMGTJElZWVioyMdDqKT1JSUlRdXS2v19swvXr1cjqWZeHh4crIyFBZWZkKCgo0bdo0pyNZlpSU1Oj77vV6ZYzR3Llzz/g4x3/9ODnBwcHm4MGDJjEx0QQGBpr58+ebjz76yPFcvoyfn5+ZM2eOqaqqMhs3bnQ8j9Xp16+fOXr0qLn66quNx+MxSUlJ5siRIyYkJMTxbFZm4MCBprS01AwbNsxIMr/+9a/Nv//9b8dz+Tr+/v7mk08+McYYExkZ6XgeX+avf/2rmTVrluM5mju5ubnm0UcfNf7+/mb48OHG6/WaiIgIx3M1Z6ZPn27y8vKa+vvrfNCTM378eLNr166Gj/38/MzRo0fN4MGDHc9mdZ566inzz3/+0zz88MPtqvxjY2NNampqo22HDx820dHRjmezOp07dzaSTFBQkLn//vvNjh07HM/k6/zhD38wixYtapflv2PHDnPVVVc5nqM5M3LkSFNQUGA8Hk/Dtosvvth06dLF8Wy+Ts+ePU1JSYmJiYlpal/nw56ce+65x6xatarRts8++8xMmTLF8Wy+fOMlmeTk5HZV/t+dmJgYU1lZabp16+Z4Fl9m6NChpra21lRVVZmxY8c6nseXiYqKMjt27DAdOnRod+UfHBxsjh8/btauXWuKi4tNbm6umTBhguO5rM7MmTPN22+/bZYsWWIOHTpkduzYYSZOnOh4ruZMWlqaeeGFF6zs63zYkzNv3jzz2muvNdq2adMmM23aNMez+TrtufwjIyPNgQMHzL333ut4Fl8nICDABAQEmNtuu82Ulpaa888/3/FMViYwMNBs27bNjBo1ykhqd+Xfp08fk52dbW644QYTGBhobrzxRuP1etvNb+3z5s0zNTU15u677zaBgYHmhhtuMF6v1wwYMMDxbL5Mjx49THl5uaXTVW3qgq+vrwJGy4uOjtamTZu0fPlyLVq0yOk4PqutrVVtba1WrFihvXv3KjY21ulIlixYsEDZ2dn6+OOPnY7SLPv371dsbKzWr1+vmpoavfPOO8rKylJcXJzT0Syprq7Wvn37tHTpUtXU1Gj9+vXauHGjxo0b53Q0nyQmJio7O1uFhYVN7tumyp9XATsrLi5O69ev17x587Rw4UKn4/hkwoQJevvttxttCwoKUmlpqUOJfJOQkKA77rhDJSUlKikpkSRt375diYmJDiezJioqSvfdd1+jbcHBwaqqqnIokW/y8/MVGhraaJu/v788Ho9DiZonPj5eK1eutLy/47+qnJwOHTqYoqIik5yc3PBsn61btzqeqznT3k779O3b15SVlZmf/exnjmdpzvTo0cOUlJSYm2++2fj7+5u77rrL7Nmzx3To0MHxbM2Z9nbaJyIiwpSXl5v4+Hjj8XhMQkKCKS0tbbgG1tanY8eO5uDBg2bhwoXGz8/PjB8/3ni9XtO3b1/Hs1kdj8djysrKfDlV5XzoU+dceRVweyv/Z555xpw4ccJ4vd5GM2bMGMezWZ3Y2FiTm5trjh49at5//30zcOBAxzM1d9pb+Usy8fHxZufOnaa8vNx89tln7epnR/r26cLvvfeeKSkpMXl5eSY+Pt7xTL7MeeedZ4wxDc96a2p4hS8AuFCbOucPALAH5Q8ALkT5A4ALUf4A4EKUPwC4EOUPAC5E+cM1wsPDZYzRJZdc0uS+WVlZSk1NbfZaZ/t4oLVR/gDgQpQ/ALgQ5Q9XuuCCC/Tqq6/qP//5j6qrq/X111/rjjvuaLTP+eefr3Xr1qmyslJffPHFaXcITUlJUWFhobxerzZu3KiYmJjvXatLly569dVX9c0336i8vFwZGRnt9u1Jce6g/OFKr7zyii644AJdd911uvjii7VmzRotW7ZMPXr0aNgnOTlZGzdu1OWXX67Vq1crMzNTERERkqTf/OY3+v3vf6+ZM2fqiiuu0Lp16/Thhx82/PmpHn30UfXv31+xsbEaNmyY6urq9Je//MWmrxT4YY7fkIhh7Jjw8HBjjDGXXHKJufvuu02/fv0a/uxHP/qRMcaYn/70p0aSycrKMpmZmY0ev23bNvPoo48aSaawsNAkJSU1+vPMzMyGt8LMyspq+O9//OMfJjMzs+GGW7169TIjR450/PvBuHs48ocrLVu2TCNGjNDSpUuVmZnZ8J4R/v7+Dfts3ry50WO2bt2qSy65RJ07d1Z4eLjS0tLk9Xob5pprrtGgQYNOW+uJJ57QlVdeqcOHDysjI0OTJk3S559/3rpfINCEAKcDAHbzeDzKzMzUhRdeqL/97W/64IMP9MUXXyg/P7/RfidOnGj0sZ+fn44fP97wD0RycrI+++yzRvtUVlaett6nn36qiIgIxcfHKy4uTo8//rhmzpyp6OhoVVdXt/BXB1hD+cN1/Pz8dO2116p///4qKCiQJA0fPlySGr1zU1RUVKPHRUdHa9WqVSorK1NRUZEuuugivfHGGw1//swzz+hf//qXXnrppUaPe/DBB5WTk6P09HSlp6frxz/+sfLz8xUVFaWcnJzW+jKBM+K0D1ynW7duqq2t1S233KK+ffvq+uuv18svvyzp27cePGny5MmaPXu2Bg4cqNTUVEVEROi5556TJD311FNasGCBpk6dqn79+mnevHmaNWuWvvrqq9PW6927t5YsWaKRI0cqIiJCt912m0pLS0/7TQOwm+MXHhjGjjn1gu/tt99uCgoKTEVFhdm1a5eZNWuWyc3NNQ899JCRvr1g++c//9l8+OGHpqqqymzfvt2MHj264XN5PB4zd+5cU1hYaCorK83nn39uJk+e3PDnp17w7dSpk1m+fLk5dOiQqaysNJs3bzajRo1y/PvBuHt4Jy8AcCFO+wCAC1H+AOBClD8AuBDlDwAuRPkDgAtR/gDgQpQ/ALgQ5Q8ALvT/AXh605HovV+mAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots()\n",
"ax.hist2d(\n",
"\n",
" kmedoids.labels_,\n",
" labels_array.flatten(),\n",
" bins=n_classes,\n",
")\n",
"ax.set_xlabel('labels')\n",
"ax.set_ylabel('classes')\n",
"ax.grid(False)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Agglomerative clustering"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"agglom = AgglomerativeClustering(n_clusters=n_classes,\n",
" affinity='jaccard',\n",
" linkage='average',\n",
"# linkage='complete',\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AgglomerativeClustering(affinity='jaccard', compute_full_tree='auto',\n",
" connectivity=None, linkage='average', memory=None,\n",
" n_clusters=8, pooling_func='deprecated')"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agglom.fit(views_array)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEPCAYAAACqZsSmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGd5JREFUeJzt3XtQV3X+x/HXFwS8gqzlLRUU10u1RLZC6pZ0McVEc2PdwFxq2l1bs92UbubqNnbdcGvUKR2pTduKdbZy1ZKwC6xmlqjRaoZYA95CVh2EL3FJ6/P7I+IXaXpAOEf8PB8z7xk5nC+fF5QvD+d8v+frk2QEALBKgNcBAADuo/wBwEKUPwBYiPIHAAtR/gBgIcofACzkWvmnpKTI7/c3GGOMZs2a5VYEAMD3GC9mypQppqCgwISGhnqyPsMwjM3jq/uDq7p3765PP/1UY8aM0Ycffuj28gBgPU/KPyMjQz6fT7/97W9/dJ+bek9VVUW1i6kA4Met3PVfryM4EvHzw9q/f/9p93O9/Lt166bPP/9cF198sYqLi0+6T69evbRv3z43YwHAOaN3796n/QegjUtZ6iUnJys3N/dHi1+SKioqvt23z+0c/QM4K7SKI39fRwV03VDfoafievknJiZq2bJljvatqqhWVUVVywYCACdMpdcJmpWrz/P3+XwaOnSoNm3a5OayAIAfcLX8u3Tpok6dOqmkpMTNZQEAP+DqaZ/Dhw/L5/O5uSQA4CS4vQMAWIjyBwALUf4AYCHKHwAsRPkDgIUofwCwEOUPABai/AHAQpQ/AFiI8gcAC1H+AGAhyh8ALET5A4CFKH8AsBDlDwAWovwBwEKUPwBYiPIHAAtR/gBgIcofACzk6hu4n4uyv8j3OoIjo3vGeB0BaNVaw9+h9qHtteqos31dPfKPiIhQVlaWKioqVFRUpMmTJ7u5PACgjqvlv2rVKm3dulXh4eGaNGmSlixZosjISDcjAADk4mmfYcOGKSwsTHPmzJExRnl5eYqLi9Phw4fdigAAqOPakf+ll16qTz75RAsWLNDBgwe1fft29evXT5WVlW5FAADUca38w8PDNXr0aO3evVu9e/dWWlqaMjMz1b9/f7ciAADquFb+tbW12rt3rxYtWqRjx45p3bp12rBhg0aPHu1WBABAHdfKv7CwUGFhYQ22BQYGyufzuRUBAFDHtfJ/6623VFtbq3nz5ikgIEBjxozR8OHDtXr1arciAADquPZsn+rqal111VV6+umndeTIEZWWliolJUV79+51KwIAoI6rr/AtLCzUqFGj3FwSAHAS3NsHACxE+QOAhSh/ALAQ5Q8AFqL8AcBClD8AWIjyBwALUf4AYCHKHwAsRPkDgIUofwCwEOUPABai/AHAQpQ/AFiI8gcAC1H+AGAhyh8ALOTqO3mdi0b3jPE6wjkl+4t8ryOcFv/NcS7gyB8ALET5A4CFKH8AsJCr5Z+Wlqba2lr5/f766dGjh5sRAAByufxjYmKUlpamTp061U9JSYmbEQAA8qD8P/74YzeXBACchGvlHxISooEDB+ree+9VaWmp8vPzNXbsWLeWBwB8j2vl37VrV73//vtatGiRevXqpdmzZ2vFihUaNGiQWxEAAHVce5HXvn37FB8fX//xG2+8oZycHCUkJKigoMCtGAAAuXjkHx0drXvuuafBtpCQENXU1LgVAQBQx7Xyr6io0F/+8hclJibK5/MpKSlJl19+uVauXOlWBABAHddO+xQXFys5OVmPPfaYMjMztXv3bo0fP14HDx50KwIAoI6rN3Zbs2aN1qxZ4+aSAICT4PYOAGAhyh8ALET5A4CFKH8AsBDlDwAWovwBwEKUPwBYiPIHAAtR/gBgIcofACxE+QOAhSh/ALAQ5Q8AFnL1rp6NtXLXfyVT6XWMUxrdM8brCOcUfp6AOzjyBwALUf4AYCHKHwAsRPkDgIUofwCwUJPLPygoSJdddpk6derUnHkAAC5wXP5RUVHKzc1VXFyc2rVrp82bN2vz5s3as2eP4uLiWjIjAKCZOS7/RYsWye/3q7i4WFOmTFGvXr00cOBALV68WE8++aTjBQcPHqzq6mpFRUU1KTAA4Mw5Lv8rrrhCM2bMUGlpqW644Qa98cYb+uyzz5SRkaGYGGcvzAkMDNTzzz+vtm3bNjkwAODMOS7/mpoaBQUFqX379ho5cqSysrIkSd27d1d5ebmjrzFr1iy99957TUsKAGg2jm/vkJ2drYyMDPn9flVVVWnNmjW6+uqrtWDBAq1evfq0j4+Ojtavf/1rDR06VGlpaWcUGgBwZhwf+U+dOlVbtmxRTU2Nrr/+elVVVWno0KHKzc3VXXfddcrHBgUF6fnnn9fUqVNVU1NzxqEBAGfG8ZH/l19+eULJ//Wvf3X02Llz5yo3N1fvv/9+49IBAFpEo57n/6tf/UoffvihysrK1LdvX6Wnpzs6hZOUlKTbbrtNZWVlKisrkyRt27ZNycnJTUsNADgjjss/NTVVzzzzjFauXKng4GBJUkFBgebOnav77rvvlI8dPHiwOnfurPDwcIWHh0uShgwZoszMzDOIDgBoKsfln5aWpj/84Q96/PHH9fXXX0uSnnvuOd16662aOnVqiwUEADQ/x+f8o6KitGXLlhO25+fnq3v37o1a1OfzNWp/AEDzcnzkv2vXLl177bUnbJ80aZIKCgqaNRQAoGU5PvJ/4IEH9Morr+jnP/+52rRpo9tvv139+/fXuHHjlJSU1JIZAQDNzPGR/5tvvqnY2FiFhIRox44dGjVqlGpqanT55ZdrzZo1LZkRANDMGvUG7jt37tStt95a/3GXLl105MiRZg8FAGhZjo/8zzvvPC1btkzR0dEKCAhQVlaWSktLtXv3bg0cOLAlMwIAmpnj8n/66ac1aNAg+f1+JScna8SIERozZow2bNighQsXtmRGAEAzc3zaZ9SoUbryyitVVFSk+fPnKysrS2+//baKioqUn5/fkhkBAM3M8ZG/z+dTVVWV2rRpo2uuuab+ls4dO3ZUdXV1iwUEADQ/x0f+69evV3p6uo4ePaqgoCCtWrVKP/vZz7RgwQK9++67LZkRANDMHJf/1KlTtWTJEg0ZMkSTJk1SWVmZ7r//fvn9ft15550tEm7ioGhVVZztv1UYrwMArdraA9u8juDI2AuGeB2hWTku/4MHD+qGG25osO10N3QDAJydHJ/zb9eunR588EH1799fkrRkyRL5/X698847jb63DwDAW47Lf8GCBbr55psVHBys8ePHKzU1VTNmzFB1dTVP9QSAVsbxaZ8JEyZo3Lhx2rlzp2bNmqW33npLzz77rDZu3KhNmza1ZEYAQDNr1Gmf0tJS+Xw+jR49Wm+++aYkyRhTf39/AEDr4PjIPy8vT/fdd58OHTqk8PBwrVy5Uj169NBDDz2kDz74oCUzAgCamePynz59ul5++WVFRkZq2rRpKikp0cKFCzVw4EBNnDixJTMCAJqZ4/L/9NNPdemllzbY9sADD6iysrLZQwEAWlajbukcExOjCy+8UIGBgZK+veVDSEiILrvsMt1+++0tEhAA0Pwa9U5eDz30kCorK9WhQweVl5crLCxMkrR27doWCwgAaH6On+0zdepU3XPPPQoLC1NJSYmio6N1wQUX6IMPPlBeXl5LZgQANDPH5d+9e3e9+uqrkqT8/HwNGzZMpaWluvfeezVlyhRHXyMlJUWFhYXy+/3avHmzhg8f3rTUAIAz4rj8Dx06pC5dukiSCgsLdckll0iSDhw4oJ49e5728QMGDNDixYt10003qVOnTsrIyNArr7zSxNgAgDPhuPxXrVqlpUuX6pJLLlFOTo5+85vf6Morr9TMmTO1Z8+e0z6+sLBQPXv21LZt2xQcHKzw8HDe/xcAPOL4gu/dd9+tJ598UhdffLFeeukl3XjjjXrnnXf05ZdfKjk52dHX+PLLLxUTE6MtW7bo+PHjSkxMbHJwAEDT+XQGN6Tv0KGDampqGnV7hzZtvv335uabb9aCBQvUv39/HTp0qME+oaGhKi8v14Tw1LP/fv6G+/kDZ4L7+Tef9qHtterocoWFhamiouKU+57yyD8hIcHxot+9rePpHD9+XJK0bNkyzZw5U/Hx8frXv/7leB0AwJk7Zfm//vrrDT42dUe5Pp+v/mOfzydjTP0R/Y8ZO3aspk2bpnHjxtVvCw4OVnl5eZOCAwCa7pQXfAMDA+snODhYjz76qO644476bVu2bNGDDz6o4ODg0y60detWjRgxQjfeeKMCAwN1xx13KCgoSOvXr2+2bwYA4IzjZ/ukp6dr8uTJ+vzzz+u3Pf3000pNTdW8efNO+/jS0lJNnDhRc+bM0ZEjRzRx4kQlJCSopqamackBAE3m+IJvSUmJJk6ceMLtm0eMGKEVK1aoV69ezRaKC76APbjg23wac8G3UW/mcrI7eJaVldXf4wcA0Do4Lv+cnBw98cQTCg8Pr98WFhamRx55RLm5uS2RDQDQQhy/yOuPf/yj3n77bR04cEB79uyRMUYRERH67LPPNGHChJbMCABoZo7Lf9++fbr44os1atQoDR48WF999ZUKCwu1bt26+qeAAgBah0a9mcuxY8e0du1a7t8PAK1co8rfdUY8mwZnnewv8r2O4MjonjFeR3CkNTyL5lzk+IIvAODcQfkDgIUofwCwEOUPABai/AHAQpQ/AFiI8gcAC1H+AGAhyh8ALET5A4CFKH8AsBDlDwAWovwBwEKUPwBYiPIHAAtR/gBgIVfLf/z48dqxY4fKy8uVl5enESNGuLk8AKCOa+Xft29fvfDCC5o2bZo6d+6sp556SqtXr1ZoaKhbEQAAdVwr/4iICGVkZGj9+vUyxujll1/WN998o0GDBrkVAQBQx7X38M3NzVVubm79x3FxcerYsaMKCwvdigAAqOPJBd+oqCi9+uqrmjNnjo4ePepFBACwmuvlHxsbq40bN2rp0qWaP3++28sDAOTiaR9JSkhIUGZmptLS0vTcc8+5uTQA4HtcK/8+ffpoxYoVuuWWW/Taa6+5tSwA4CRcO+0zY8YMdejQQcuXL5ff76+fkSNHuhUBAFDHtSP/GTNmaMaMGW4tBwA4BW7vAAAWcvWCL3AuGN0zxusIjmR/ke91BEday8/zXMORPwBYiPIHAAtR/gBgIcofACxE+QOAhSh/ALAQ5Q8AFqL8AcBClD8AWIjyBwALUf4AYCHKHwAsRPkDgIUofwCwEOUPABai/AHAQpQ/AFiI8gcAC1H+AGAhT8o/KSlJ69ev92JpAIBcLv+AgADNnDlTL774onw+n5tLAwC+x9Xyf/zxxzVhwgQ9/vjjbi4LAPgBV8v/ySef1MiRI1VUVOTmsgCAH3C1/A8ePOjmcgCAH8GzfQDAQpQ/AFiojdcB4I7sL/K9juDI6J4xXkc4Z/CzxKlw5A8AFvKk/JcvX64rrrjCi6UBAOLIHwCsRPkDgIUofwCwEOUPABai/AHAQpQ/AFiI8gcAC1H+AGAhyh8ALET5A4CFKH8AsBDlDwAWovwBwEKUPwBYiPIHAAtR/gBgIcofACxE+QOAhSh/ALAQ5Q8AFqL8AcBCrpZ/bGystm3bpsrKSm3cuFH9+/d3c3kAQB3Xyj8kJEQrV65Uenq6wsPD9eabb2rZsmVuLQ8A+J42bi101VVXqby8XJmZmZKkRx55RGlpaRo0aJAKCgpO+pj2oe3cinfu83X0OoEj7UPbex0BaLUa05mulf8PS/6bb75RUVGRBg8efEL5h4aGSpIy9y5xKx7OEquOep0AaP1CQ0NVUVFxyn1cK/8OHTqourq6wbaqqiq1b3/ikd7+/fvVu3fv04YHADQUGhqq/fv3n3Y/18q/qqpK7do1/JWkffv2qqysPOn+TsIDABpyetDs2gXfgoICDRgw4P8XDghQv379fvR8PwCg5bhW/jk5OerSpYtSU1MVFBSk2bNna/fu3dq1a5dbEQAA32PcmiFDhpjNmzebiooKs379etOvXz/X1mYYhmH+f3x1fwAAWOSsu73DufIq4KSkJK1fv97rGI0yfvx47dixQ+Xl5crLy9OIESO8jtQoKSkpKiwslN/v1+bNmzV8+HCvIzXJ4MGDVV1draioKK+jNEpaWppqa2vl9/vrp0ePHl7HciwiIkJZWVmqqKhQUVGRJk+e7HUkx1JSUhr83P1+v4wxmjVr1ikf5/mvH99NSEiIOXDggElOTjZBQUFmzpw55r333vM8V2MmICDAzJw509TU1JgNGzZ4nsfp9O3b1xw9etRceeWVxufzmZSUFHPkyBETGhrqeTYnM2DAAFNeXm6GDBliJJnf/e535osvvvA8V2MnMDDQfPDBB8YYY6KiojzP05j5xz/+YaZPn+55jqZOfn6+efjhh01gYKAZOnSo8fv9JjIy0vNcTZkpU6aYgoKC0/399T7odzNmzBizc+fO+o8DAgLM0aNHzaBBgzzP5nSeeOIJ85///Mc8+OCDrar84+PjTXp6eoNthw4dMrGxsZ5nczodOnQwkkxwcLC59957zfbt2z3P1Nj585//bObPn98qy3/79u3miiuu8DxHU2bYsGGmqKjI+Hy++m0XXnih6dixo+fZGjvdu3c3ZWVlJi4u7nT7eh/2u7nrrrvMa6+91mDbRx99ZCZOnOh5tsb84CWZ1NTUVlX+P5y4uDhTXV1tOnfu7HmWxkxMTIw5fvy4qampMaNGjfI8T2MmOjrabN++3bRt27bVlX9ISIj56quvzJo1a0xpaanJz883Y8eO9TyX05k2bZp5/fXXzcKFC83BgwfN9u3bzbhx4zzP1ZTJyMgwzz77rJN9vQ/73cyePdu89NJLDbZt3LjRTJ482fNsjZ3WXP5RUVFm//795u677/Y8S2OnTZs2pk2bNuaWW24x5eXl5vzzz/c8k5MJCgoyW7duNcOHDzeSWl359+7d2+Tm5prrrrvOBAUFmeuvv974/f5W81v77NmzzbFjx8ydd95pgoKCzHXXXWf8fr/p37+/59kaM926dTOVlZWOTledVRd8G/sqYDS/2NhYbdy4UUuXLtX8+fO9jtNox48f1/Hjx7Vs2TLt2bNH8fHxXkdyZO7cucrNzdX777/vdZQm2bdvn+Lj47Vu3TodO3ZMb7zxhnJycpSQkOB1NEdqa2u1d+9eLVq0SMeOHdO6deu0YcMGjR492utojZKcnKzc3FwVFxefdt+zqvx5FbC3EhIStG7dOs2ePVvz5s3zOk6jjB07Vq+//nqDbcHBwSovL/coUeMkJSXptttuU1lZmcrKyiRJ27ZtU3JyssfJnImOjtY999zTYFtISIhqamo8StQ4hYWFCgsLa7AtMDBQPp/Po0RNk5iYqBUrVjje3/NfVb6btm3bmpKSEpOamlr/bJ8tW7Z4nqsp09pO+/Tp08dUVFSYX/7yl55nacp069bNlJWVmRtvvNEEBgaaO+64w3z++eembdu2nmdryrS20z6RkZGmsrLSJCYmGp/PZ5KSkkx5eXn9NbCzfdq1a2cOHDhg5s2bZwICAsyYMWOM3+83ffr08Tyb0/H5fKaioqIxp6q8D/39OVdeBdzayv+pp54yX3/9tfH7/Q1m5MiRnmdzOvHx8SY/P98cPXrUvP3222bAgAGeZ2rqtLbyl2QSExPNjh07TGVlpfnoo49a1f870rdPF37rrbdMWVmZKSgoMImJiZ5nasycd955xhhT/6y30w2v8AUAC51V5/wBAO6g/AHAQpQ/AFiI8gcAC1H+AGAhyh8ALET5wxoREREyxuiiiy467b45OTlKT09v8lpn+nigpVH+AGAhyh8ALET5w0pdu3bViy++qP/973+qra3VZ599pttuu63BPueff77Wrl2r6upqffLJJyfcITQtLU3FxcXy+/3asGGD4uLiTrpWx44d9eKLL+rw4cOqrKxUVlZWq317Upw7KH9Y6YUXXlDXrl11zTXX6MILL9Tq1au1ePFidevWrX6f1NRUbdiwQZdccolWrlyp7OxsRUZGSpJ+//vf609/+pOmTZumSy+9VGvXrtW7775b//nve/jhh9WvXz/Fx8dryJAh+uabb/T3v//dpe8U+HGe35CIYdyYiIgIY4wxF110kbnzzjtN37596z/3k5/8xBhjzC9+8QsjyeTk5Jjs7OwGj9+6dat5+OGHjSRTXFxsUlJSGnw+Ozu7/q0wc3Jy6v/873//22RnZ9ffcKtHjx5m2LBhnv88GLuHI39YafHixbr88su1aNEiZWdn179nRGBgYP0+mzZtavCYLVu26KKLLlKHDh0UERGhjIwM+f3++rnqqqs0cODAE9Z67LHHdNlll+nQoUPKysrS+PHj9fHHH7fsNwicRhuvAwBu8/l8ys7OVs+ePfXPf/5T77zzjj755BMVFhY22O/rr79u8HFAQIC++uqr+n8gUlNT9dFHHzXYp7q6+oT1PvzwQ0VGRioxMVEJCQl69NFHNW3aNMXGxqq2traZvzvAGcof1gkICNDVV1+tfv36qaioSJI0dOhQSWrwzk3R0dENHhcbG6vXXntNFRUVKikp0QUXXKBXXnml/vNPPfWU/vvf/+r5559v8Lj7779feXl5yszMVGZmpn7605+qsLBQ0dHRysvLa6lvEzglTvvAOp07d9bx48d10003qU+fPrr22mu1fPlySd++9eB3JkyYoBkzZmjAgAFKT09XZGSknnnmGUnSE088oblz52rSpEnq27evZs+erenTp2vXrl0nrNerVy8tXLhQw4YNU2RkpG655RaVl5ef8JsG4DbPLzwwjBvz/Qu+t956qykqKjJVVVVm586dZvr06SY/P9888MADRvr2gu3f/vY38+6775qamhqzbds2M2LEiPqv5fP5zKxZs0xxcbGprq42H3/8sZkwYUL9579/wbd9+/Zm6dKl5uDBg6a6utps2rTJDB8+3POfB2P38E5eAGAhTvsAgIUofwCwEOUPABai/AHAQpQ/AFiI8gcAC1H+AGAhyh8ALPR/UJ1uwsGTE+cAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots()\n",
"ax.hist2d(\n",
" agglom.labels_,\n",
" labels_array.flatten(),\n",
" bins=n_classes,\n",
")\n",
"ax.set_xlabel('labels')\n",
"ax.set_ylabel('classes')\n",
"ax.grid(False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment