Skip to content

Instantly share code, notes, and snippets.

@OnlyBelter
Created July 24, 2020 13:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save OnlyBelter/71d399309ebe6ca974c0bc0c9ad95611 to your computer and use it in GitHub Desktop.
Save OnlyBelter/71d399309ebe6ca974c0bc0c9ad95611 to your computer and use it in GitHub Desktop.
Do hierarchy clustering by scipy and split clusters by distance threshold
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"np.set_printoptions(suppress=True)\n",
"import scipy\n",
"from scipy.spatial import distance\n",
"from scipy.cluster import hierarchy\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"sns.set()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length</th>\n",
" <th>sepal_width</th>\n",
" <th>petal_length</th>\n",
" <th>petal_width</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.1</td>\n",
" <td>3.5</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.9</td>\n",
" <td>3.0</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width\n",
"0 5.1 3.5 1.4 0.2\n",
"1 4.9 3.0 1.4 0.2"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris = sns.load_dataset('iris').iloc[range(10), :]\n",
"species = iris.pop('species')\n",
"iris.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(10, 10)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.000000</td>\n",
" <td>0.995999</td>\n",
" <td>0.999974</td>\n",
" <td>0.998168</td>\n",
" <td>0.999347</td>\n",
" <td>0.999586</td>\n",
" <td>0.998811</td>\n",
" <td>0.999538</td>\n",
" <td>0.998077</td>\n",
" <td>0.996552</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.995999</td>\n",
" <td>1.000000</td>\n",
" <td>0.996607</td>\n",
" <td>0.997397</td>\n",
" <td>0.992233</td>\n",
" <td>0.993592</td>\n",
" <td>0.990721</td>\n",
" <td>0.997118</td>\n",
" <td>0.998546</td>\n",
" <td>0.999033</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"0 1.000000 0.995999 0.999974 0.998168 0.999347 0.999586 0.998811 \n",
"1 0.995999 1.000000 0.996607 0.997397 0.992233 0.993592 0.990721 \n",
"\n",
" 7 8 9 \n",
"0 0.999538 0.998077 0.996552 \n",
"1 0.997118 0.998546 0.999033 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris_corr = iris.T.corr()\n",
"print(iris_corr.shape)\n",
"iris_corr.head(2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### distance.pdist\n",
"- Pairwise distances between observations in n-dimensional space."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.01442885, 0.0009666 , 0.00625717, 0.00568494, 0.00320313,\n",
" 0.00792722, 0.00322728, 0.00768217, 0.01158714, 0.01377558,\n",
" 0.0098153 , 0.01727702, 0.01620735, 0.01825397, 0.01291293,\n",
" 0.00842762, 0.00425646, 0.00549239, 0.00658408, 0.0041295 ,\n",
" 0.00879105, 0.00241845, 0.00684741, 0.01085314, 0.01071777,\n",
" 0.00855472, 0.0124801 , 0.00364936, 0.00181753, 0.00603786,\n",
" 0.00272345, 0.00231928, 0.00858389, 0.01220441, 0.01515585,\n",
" 0.00496781, 0.00602494, 0.01013213, 0.0136038 , 0.01070284,\n",
" 0.0139248 , 0.01644851, 0.00508454, 0.00948891, 0.00459087])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dis = distance.pdist(iris_corr, metric='euclidean') # 任意两列之间的距离\n",
"dis"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0. , 2. , 0.0009666 , 2. ],\n",
" [ 3. , 8. , 0.00181753, 2. ],\n",
" [ 4. , 6. , 0.00231928, 2. ],\n",
" [ 7. , 10. , 0.00281043, 3. ],\n",
" [ 5. , 12. , 0.0038345 , 3. ],\n",
" [ 1. , 9. , 0.00425646, 2. ],\n",
" [11. , 13. , 0.00574414, 5. ],\n",
" [14. , 16. , 0.00838076, 8. ],\n",
" [15. , 17. , 0.01187589, 10. ]])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"row_linkage = hierarchy.linkage(dis, method='centroid') # 任意两个cluster之间的距离,以及这个cluster内部包含的样本的个数\n",
"row_linkage"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 9, 5, 4, 6, 3, 8, 7, 0, 2], dtype=int32)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hierarchy.leaves_list(row_linkage)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 设置p的值和truncate_mode来确定展示的树形结构的深度"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD7CAYAAABnoJM0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAcx0lEQVR4nO3df3BU1f3/8efmp2KigcxuQoOO6LfIZwiYaj5+aJRgZ5gsIlEmJaVDSlprQ0sVBG2+MCYNxUoRmyFULXxbrHagSScZqkkz4iaIw9exRIek/qAQnQlTBRPdbBKqSQiyyd7vHw777UrjJiHchZzX46+ce87dc97eNa+99+4NDsuyLERExFhRkV6AiIhEloJARMRwCgIREcMpCEREDKcgEBExnIJARMRwCgIREcPFRHoBY3HqVD+BgD2PPyQnJ9Dd3WfLXJGg+i5vE7m+iVwb2FtfVJSDyZOvGrb/sgyCQMCyLQjOzTeRqb7L20SubyLXBpdOfbo0JCJiuBEFQX19PYsWLSInJ4fKysrz+ltbW8nLy8PtdlNSUsLg4GBI//bt23n66aeD7ePHj1NQUMC9997LsmXLaG1tvcAyRERkrMIGgdfrpaKigqqqKmpra6murqatrS1kTHFxMWVlZTQ0NGBZFjU1NQD09vby6KOP8vzzz4eMLy0tpaioiLq6OtauXcv69evHsSQRERmNsEFw6NAh5s6dS1JSEpMmTcLtduPxeIL97e3tnDlzhoyMDADy8vKC/QcOHOD666/nvvvuC3nN/Px85s2bB8BNN93Exx9/PG4FiYjI6IQNgs7OTpxOZ7Dtcrnwer3D9judzmD/kiVLWLlyJdHR0SGvmZeXF9z21FNPsWDBggurQkRExizst4YCgQAOhyPYtiwrpB2ufziWZfHkk0/yzjvvsHv37lEtOjk5YVTjL5TTmWjrfHZTfZe3iVzfRK4NLp36wgZBamoqzc3NwbbP58PlcoX0+3y+YLurqyuk/z8ZHBxk/fr1eL1edu/eTWLi6P5jdHf32fa1K6czEZ+v15a5Dr7dzptHveEHjqPYuGj8Z4dsndNOqm98/c+sFO7MSLNlLjv/34sEO+uLinJ85QfosJeGsrKyaGpqoqenh4GBARobG8nOzg72p6WlER8fT0tLCwB1dXUh/f/J1q1b6evr47nnnht1CExkbx71cqJz4j5AI5e3E519tn9QEXuEPSNISUlh3bp1FBYW4vf7Wbp0KXPmzKGoqIg1a9Ywe/ZsysvLKS0tpa+vj1mzZlFYWDjs6/X09FBZWcm0adPIz88Pbq+rqxufii5z17kSWF9wi23z6VPX5c3O+rZW/t2WecR+I3qyODc3l9zc3JBtu3btCv48c+ZM9u7dO+z+q1evDv48ZcoUjh07Ntp1iojIRaIni0VEDKcgEBExnIJARMRwCgIREcMpCEREDKcgEBExnIJARMRwCgIREcMpCEREDKcgEBExnIJARMRwCgIREcMpCEREDKcgEBExnIJARMRwCgIREcMpCEREDKcgEBExnIJARMRwCgIREcMpCEREDKcgEBExnIJARMRwCgIREcMpCEREDKcgEBEx3IiCoL6+nkWLFpGTk0NlZeV5/a2treTl5eF2uykpKWFwcDCkf/v27Tz99NPB9meffcbKlSu56667KCgowOfzXWAZIiIyVmGDwOv1UlFRQVVVFbW1tVRXV9PW1hYypri4mLKyMhoaGrAsi5qaGgB6e3t59NFHef7550PGb9++nczMTF5++WXy8/PZvHnzOJYkIiKjETYIDh06xNy5c0lKSmLSpEm43W48Hk+wv729nTNnzpCRkQFAXl5esP/AgQNcf/313HfffSGvefDgQXJzcwFYvHgxr732Gn6/f9yKEhGRkQsbBJ2dnTidzmDb5XLh9XqH7Xc6ncH+JUuWsHLlSqKjo4d9zZiYGBISEujp6bmwSkREZExiwg0IBAI4HI5g27KskHa4/pGwLIuoqJHft05OThjV618opzPRlnli46Jtne8cu+ezm+obH5F4f+rY2SNsEKSmptLc3Bxs+3w+XC5XSP+/3+zt6uoK6f9PXC4XXV1dpKamMjg4SH9/P0lJSSNedHd3H4GANeLxF8LpTMTn67VlLv/ZIQDb5gN764sE1Td+7H5/6tiNn6gox1d+gA77MTwrK4umpiZ6enoYGBigsbGR7OzsYH9aWhrx8fG0tLQAUFdXF9L/n8yfP5/a2loA9u3bR2ZmJrGxsSMqSERExlfYIEhJSWHdunUUFhayZMkSFi9ezJw5cygqKuLIkSMAlJeXs2XLFhYuXMjp06cpLCz8ytd86KGHePvtt7n77rupqqqirKxsfKoREZFRc1iWZc81lnE0US8Nba38OwDrC26xZT7Q6fflbiK/P3Xsxs8FXxoSEZGJTUEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImK4mEgvQGSiOPh2O28e9do6Z2xcNP6zQ7bMdaKzF4CtlX+3ZT47awP4n1kp3JmRZtt8lxKdEYiMkzePejnR2RfpZVw017kSuc6VGOllXBQnOvtsD/FLic4IRMbRda4E1hfcYtt8TmciPl+vbfPZyc7a7DrLuVTpjEBExHAjCoL6+noWLVpETk4OlZWV5/W3traSl5eH2+2mpKSEwcFBADo6OigoKGDhwoWsWrWK/v5+AD799FOKioq45557WLp0Ka2treNYkoiIjEbYIPB6vVRUVFBVVUVtbS3V1dW0tbWFjCkuLqasrIyGhgYsy6KmpgaATZs2sXz5cjweD+np6ezYsQOA559/nhkzZvDXv/6Vn/70pzz22GMXoTQRERmJsEFw6NAh5s6dS1JSEpMmTcLtduPxeIL97e3tnDlzhoyMDADy8vLweDz4/X4OHz6M2+0O2Q4QCASCZwcDAwNcccUV416YiIiMTNibxZ2dnTidzmDb5XLx7rvvDtvvdDrxer2cOnWKhIQEYmJiQrYD/PCHP2TZsmXccccd9Pf389xzz41bQSIiMjphgyAQCOBwOIJty7JC2sP1f3kcEGz/8pe/pKCggMLCQt566y3WrVvHSy+9xFVXXTWiRScnJ4xo3HhxOu35ylxsXLSt851j93x20/G7fOnY2SNsEKSmptLc3Bxs+3w+XC5XSL/P5wu2u7q6cLlcTJkyhd7eXoaGhoiOjg7Z78CBA8H7At/4xjdITk7m+PHjzJkzZ0SL7u7uIxCwRlbhBbLzK2znHp6x8+uAE/nrh6DjdznTsRs/UVGOr/wAHfYeQVZWFk1NTfT09DAwMEBjYyPZ2dnB/rS0NOLj42lpaQGgrq6O7OxsYmNjyczMZN++fQDU1tYG95s5cyavvPIKAB988AGdnZ1Mnz597FWKiMiYhQ2ClJQU1q1bR2FhIUuWLGHx4sXMmTOHoqIijhw5AkB5eTlbtmxh4cKFnD59msLCQgA2btxITU0NixYtorm5mbVr1wLwxBNP8Je//IXFixfz8MMPs3XrVhITL41TJBER04zoyeLc3Fxyc3NDtu3atSv488yZM9m7d+95+6WlpbFnz57ztl9//fXs3r17tGsVEZGLQE8Wi4gYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImK4EQVBfX09ixYtIicnh8rKyvP6W1tbycvLw+12U1JSwuDgIAAdHR0UFBSwcOFCVq1aRX9/PwB9fX088sgjLFmyhCVLlnD06NFxLElEREYjbBB4vV4qKiqoqqqitraW6upq2traQsYUFxdTVlZGQ0MDlmVRU1MDwKZNm1i+fDkej4f09HR27NgBwJYtW5g6dSq1tbU8/PDD/OIXvxj/ykREZETCBsGhQ4eYO3cuSUlJTJo0CbfbjcfjCfa3t7dz5swZMjIyAMjLy8Pj8eD3+zl8+DButztku2VZNDY2snLlSgCys7P51a9+dTFqExGREQgbBJ2dnTidzmDb5XLh9XqH7Xc6nXi9Xk6dOkVCQgIxMTEh27u7u4mLi6Oqqoply5ZRWFjI0NDQeNYkIiKjEBNuQCAQwOFwBNuWZYW0h+v/8jgAh8PB0NAQXV1dJCYmUl1dzd/+9jceeOABDhw4MOJFJycnjHjseHA6E22ZJzYu2tb5zrF7Prvp+F2+dOzsETYIUlNTaW5uDrZ9Ph8ulyuk3+fzBdtdXV24XC6mTJlCb28vQ0NDREdHB/ebPHkyMTExLF68GIDbb7+d06dP093dTXJy8ogW3d3dRyBgjbjIC+F0JuLz9doyl//sF2dGds0H9tYXCTp+ly8du/ETFeX4yg/QYS8NZWVl0dTURE9PDwMDAzQ2NpKdnR3sT0tLIz4+npaWFgDq6urIzs4mNjaWzMxM9u3bB0BtbS3Z2dnExcWRlZXFSy+9BMDbb7/NlVdeyeTJky+oUBERGZuwZwQpKSmsW7eOwsJC/H4/S5cuZc6cORQVFbFmzRpmz55NeXk5paWl9PX1MWvWLAoLCwHYuHEjGzZsYOfOnUydOpVt27YBsHnzZsrKyqiqqiImJoaKigqiovRIg4h84V//9yCfvHUY/9lBW+b7PPBfAJx8ssGW+QCGFtxJ9C3ftG2+r+KwLMueayzjaKJeGtpa+XcA1hfcYst8MLEvLYCO3+Xq5JNbONt+kri0ayO9lIvi85MnSLjxBlLXFtsyX7hLQ2HPCEREIuGq6dNt+0Vpt5NPbon0EkLoeoyIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYLibSC7iUnW09SIfnMH7/oC3zDXX/FwCn6xtsmQ/gs4w74dpv2jafiFx6FARfYbCtCavnJI4p19oy39rprbbMc85Q9wn6jr5OrIJAxGgjCoL6+np27tzJ4OAg3//+9ykoKAjpb21tpaSkhP7+fjIzM9m0aRMxMTF0dHRQXFxMd3c306dPp7y8nKuuuiq43yeffMI999zDCy+8wLRp08a3snESlzKd2IXFkV7GRXG6fkuklyAil4Cw9wi8Xi8VFRVUVVVRW1tLdXU1bW1tIWOKi4spKyujoaEBy7KoqakBYNOmTSxfvhyPx0N6ejo7duwI7hMIBCgpKcHv949zSSIiMhphg+DQoUPMnTuXpKQkJk2ahNvtxuPxBPvb29s5c+YMGRkZAOTl5eHxePD7/Rw+fBi32x2y/Zxnn32WrKwsJk+ePN41iYjIKIQNgs7OTpxOZ7Dtcrnwer3D9judTrxeL6dOnSIhIYGYmJiQ7QD/+Mc/eOONN7jvvvvGrRARERmbsPcIAoEADocj2LYsK6Q9XP+XxwE4HA4GBgbYtGkTv/nNb4iKGtu3V5OTE8a032h1xJ4LsURb5rPbRK/vHLvqi42LtnW+cybi8fskbmK/Ny+1+sIGQWpqKs3NzcG2z+fD5XKF9Pt8vmC7q6sLl8vFlClT6O3tZWhoiOjo6OB+zc3NdHd3s2rVKuCLM4qVK1fyzDPPcMMNN4xo0d3dfQQC1oiLHCu/f5DY2Bh8vt6LPlckTPT6Xm9/g3d6juD3D9ky3z97vvjCQ0nDr22ZD+Bb/2suN1+dYdt8dvGfHSQ2buK+N+2uLyrK8ZUfoMN+JM/KyqKpqYmenh4GBgZobGwkOzs72J+WlkZ8fDwtLS0A1NXVkZ2dTWxsLJmZmezbtw+A2tpasrOzmTdvHq+++ip1dXXU1dXhcrn4/e9/P+IQEBmpZu/bfPCvj2yb78b//ogb/9u++T7q6+D1Dw/bNp9MXGHPCFJSUli3bh2FhYX4/X6WLl3KnDlzKCoqYs2aNcyePZvy8nJKS0vp6+tj1qxZFBYWArBx40Y2bNjAzp07mTp1Ktu2bbvoBYn8u+uTpvHA7KJIL+Oi2P73/xPpJcgEMaLnCHJzc8nNzQ3ZtmvXruDPM2fOZO/eveftl5aWxp49e77ytV999dWRLEFERC4S/a0hERHDKQhERAynIBARMZyCQETEcAoCERHDKQhERAynIBARMZyCQETEcAoCERHDKQhERAynIBARMZyCQETEcAoCERHDKQhERAynIBARMZyCQETEcAoCERHDKQhERAynIBARMZyCQETEcAoCERHDKQhERAynIBARMZyCQETEcAoCERHDKQhERAw3oiCor69n0aJF5OTkUFlZeV5/a2sreXl5uN1uSkpKGBwcBKCjo4OCggIWLlzIqlWr6O/vB+D48eMUFBRw7733smzZMlpbW8exJBERGY2wQeD1eqmoqKCqqora2lqqq6tpa2sLGVNcXExZWRkNDQ1YlkVNTQ0AmzZtYvny5Xg8HtLT09mxYwcApaWlFBUVUVdXx9q1a1m/fv1FKE1EREYibBAcOnSIuXPnkpSUxKRJk3C73Xg8nmB/e3s7Z86cISMjA4C8vDw8Hg9+v5/Dhw/jdrtDtgPk5+czb948AG666SY+/vjjcS9MRERGJibcgM7OTpxOZ7Dtcrl49913h+13Op14vV5OnTpFQkICMTExIdvhi1A456mnnmLBggWjWnRycsKoxo9VR+y5tSfaMp/dJnp9sbHRgOq7HH0SN7Hfm5dafWGDIBAI4HA4gm3LskLaw/V/eRxw3rgnn3ySd955h927d49q0d3dfQQC1qj2GQu/f5DY2Bh8vt6LPlckTPz6hoiNjVZ9lyH/2UFi4ybwe9Pm+qKiHF/5ATrspaHU1FR8Pl+w7fP5cLlcw/Z3dXXhcrmYMmUKvb29DA0Nnbff4OAgP/vZzzhy5Ai7d+8mMfHSSEUREROFDYKsrCyampro6elhYGCAxsZGsrOzg/1paWnEx8fT0tICQF1dHdnZ2cTGxpKZmcm+ffsAqK2tDe63detW+vr6eO655xQCIiIRFvbSUEpKCuvWraOwsBC/38/SpUuZM2cORUVFrFmzhtmzZ1NeXk5paSl9fX3MmjWLwsJCADZu3MiGDRvYuXMnU6dOZdu2bfT09FBZWcm0adPIz88PzlNXV3fxqhQRkWGFDQKA3NxccnNzQ7bt2rUr+PPMmTPZu3fvefulpaWxZ8+e87YfO3ZstOsUEZGLRE8Wi4gYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImI4BYGIiOEUBCIihlMQiIgYTkEgImK4EQVBfX09ixYtIicnh8rKyvP6W1tbycvLw+12U1JSwuDgIAAdHR0UFBSwcOFCVq1aRX9/PwCfffYZK1eu5K677qKgoACfzzeOJYmIyGiEDQKv10tFRQVVVVXU1tZSXV1NW1tbyJji4mLKyspoaGjAsixqamoA2LRpE8uXL8fj8ZCens6OHTsA2L59O5mZmbz88svk5+ezefPmi1CaiIiMREy4AYcOHWLu3LkkJSUB4Ha78Xg8PPjggwC0t7dz5swZMjIyAMjLy+Opp54iPz+fw4cP89vf/ja4/Xvf+x7FxcUcPHgweGaxePFiHnvsMfx+P7GxsSNadFSUY/SVjkF0QhLRMTG2zWe3iV5f0hVXExsbrfouQ3GTk4iJnbjvTbvrCzdP2CDo7OzE6XQG2y6Xi3fffXfYfqfTidfr5dSpUyQkJBATExOy/cv7xMTEkJCQQE9PDykpKSMqavLkq0Y07oJ953/bM0+kTPD6iuf/ONJLuKgmcn3Jj07s9+alVl/YS0OBQACH4/+niWVZIe3h+r88Djiv/e/7REXpvrWISCSE/e2bmpoacjPX5/PhcrmG7e/q6sLlcjFlyhR6e3sZGho6bz+Xy0VXVxcAg4OD9Pf3By89iYiIvcIGQVZWFk1NTfT09DAwMEBjYyPZ2dnB/rS0NOLj42lpaQGgrq6O7OxsYmNjyczMZN++fQDU1tYG95s/fz61tbUA7Nu3j8zMzBHfHxARkfHlsCzLCjeovr6e3/3ud/j9fpYuXUpRURFFRUWsWbOG2bNn895771FaWkpfXx+zZs1iy5YtxMXF0d7ezoYNG+ju7mbq1Kls27aNa665hn/9619s2LCBkydPkpiYSHl5OdOmTbOjXhER+ZIRBYGIiExcukMrImI4BYGIiOEUBCIihlMQiIgYLuyTxSazLIsNGzYwY8YM7r///kgvZ1zt2bOHP/3pT1xxxRXceOONlJWVTahnOZ544gk8Hg/XXHMNANOnT2f79u0RXtWF+fWvf803v/lNuru7+cMf/oDD4eDKK6+kpKSE2bNn88orr/D+++/zwAMPRHqpY2JKfR988AF//vOfcTgcXHvttTz++OMkJydHtj5L/qO2tjZrxYoV1s0332w9++yzkV7OuGpqarLmzZtnffzxx5ZlWdaLL75orV69OsKrGl/f+c53rJaWlkgvY9y89dZb1o9//GPr+PHj1u233255vV7Lsizr4MGD1vz584PjfvCDH1jHjh2L0CrHzpT6jhw5Yn3rW9+yPvvsM8uyLOuJJ56wfv7znwfHRao+XRoaRmVlJfn5+SxcuDDSSxl3R48eJSsri9TUVABycnJ49dVXOXv2bIRXNj7Onj3LsWPHePbZZ8nNzWX16tV0dHREelkX5Omnn2bZsmXExcXx+OOPB5/ST09Pp6urK3jsli5dyjPPPBPJpY6JKfWlp6fT0NBAYmIin3/+OV6vN+RMPGL12R49l5n169dPuDOCw4cPW/Pnz7c++ugjy7Isa8+ePdaMGTOCn8IudydOnLB+9KMfWe+//74VCASsXbt2Wffee68VCAQivbQx+fTTT62bb77Z+vzzz0O2BwIB65FHHgk5mzt16pSVnp5uDQwM2L3MMTOxvv3791u33Xabdccdd1j//Oc/g9sjVZ/OCAyUmZnJAw88wIMPPkheXh4Oh4OkpKQJ82c+rr32Wnbt2sWMGTNwOBzcf//9nDhxgo8++ijSSxuTDz/8EKfTSVxcXHDb6dOneeihhzhx4gSPP/54cHtSUhLx8fG0t7dHYqljYmJ9CxYs4M0332T16tXcf//9BAIBIHL1KQgM1NfXx2233caLL77ICy+8wIIFCwAmzM3i9957L/i3rM6xLOuyDTqHwxH8RQFf/Mt/3/3ud4mOjmb37t1cffXVIeOjo6OJjo62e5ljZlJ9H374Ic3NzcG+b3/723R0dPDpp58Gt0WiPgWBgTo7O1mxYgV9fX0A7Ny5k7vvvnvYPxN+uYmKimLz5s2cPHkSgKqqKm666abgPZHLzXXXXUd3dzeff/45fX19rFixgpycHCoqKrjiiitCxvb29nL27Fm+9rWvRWi1o2dSfT6fj4cffpienh7gi7/j9vWvf53JkycDkatPXx810A033MDKlSvJz88nEAhw6623UlZWFulljZsZM2ZQWlrKqlWrGBoaIjU1lW3btkV6WWN29dVXc+utt/LGG2/w3nvv0dHRwf79+9m/f39wzB//+EcmT57M66+/zp133hlyGeJSZ1J98+fP5yc/+QmFhYVER0fjcrmC/4ojELn6bL0jISJj0tLSYhUVFYUdt2LFCqu1tdWGFY0v1feFSNWnS0Mil4FbbrmF6dOn89prrw07Zv/+/WRmZjJz5kwbVzY+VF9k69OfoRYRMZzOCEREDKcgEBExnIJARMRwCgIREcMpCEREDKcgEBEx3P8Du1yNicTj3dwAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dgram = hierarchy.dendrogram(row_linkage, p=2, truncate_mode='level')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 设置color threshold,确定用颜色区分类的个数"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD7CAYAAABnoJM0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3df1BU56E+8GdhF4yCInQPeEFHkxvlVlCScFtK4to2DkuIRGcrMZW4ndSBG5NoNC1XJ1AsicbopWK10TQksaOBFMbqbmnIgpoxHSPNAI3GKMkEJ0aFZHcBE1mEurDn+4dft1kVd2F/6vt8/uLse3487xF89uxPhSzLMoiISFhhwQ5ARETBxSIgIhIci4CISHAsAiIiwbEIiIgExyIgIhIci4CISHDKYAcYjQsX+uBwjP7tD3FxUejutvkw0a2bIxQyhEoOZgitHKGQIVRyeJshLEyBiRPHDTt+SxaBwyF7VQRX9xEKQiFHKGQAQiMHM/xbKOQIhQxAaOTwZwY+NEREJDiPiqCurg45OTnIyspCVVXVdeNtbW3Q6XTQarUoLi7G4OCgy/jWrVuxfft25/Lp06eRn5+PBQsWYPHixWhra/NyGkRENFpui8BsNqOiogLV1dUwGAyoqalBe3u7yzpFRUUoLS1FQ0MDZFlGbW0tAKC3txfPP/88du3a5bJ+SUkJCgoKYDQasWrVKqxZs8aHUyIiopFwWwRHjx5FRkYGYmJiMHbsWGi1WphMJud4R0cHBgYGkJaWBgDQ6XTO8UOHDmHq1Kl44oknXPaZl5eHOXPmAABmzJiBr776ymcTIiKikXFbBBaLBWq12rksSRLMZvOw42q12jm+cOFCFBYWIjw83GWfOp3Oedu2bdswb94872ZBRESj5vZVQw6HAwqFwrksy7LLsrvx4ciyjM2bN+P48ePYvXv3iELHxUWNaP0bUaujvd6HL4RCjlDIAIRGDmb4t1DIEQoZgNDI4c8MbosgISEBLS0tzmWr1QpJklzGrVarc7mrq8tl/EYGBwexZs0amM1m7N69G9HRI5tgd7fNq5dSqdXRsFp7R7394WMd+PCk2f2KbqgiwmG/POT1fm71DKGSw58ZfjgzHj9OS3S7nre/m74SCjlCIUOo5PA2Q1iY4qZ3oN0+NJSZmYmmpib09PSgv78fjY2N0Gg0zvHExERERkaitbUVAGA0Gl3Gb2TTpk2w2Wx48803R1wCoeDDk2actQT/jS50azhrsfnkjgORv7i9IoiPj8fq1auh1+tht9uxaNEizJo1CwUFBVi5ciVSU1NRXl6OkpIS2Gw2zJw5E3q9ftj99fT0oKqqCklJScjLy3PebjQafTOjAJkiRWFN/r1e7eN2uKdxO+XwV4ZNVf/0+T6JfMmjdxbn5uYiNzfX5bbKykrnz8nJydi7d++w269YscL5c2xsLE6dOjXSnERE5Cd8ZzERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgvOoCOrq6pCTk4OsrCxUVVVdN97W1gadTgetVovi4mIMDg66jG/duhXbt293Ll+8eBGFhYV46KGHkJ+fD6vV6uU0iIhotNwWgdlsRkVFBaqrq2EwGFBTU4P29naXdYqKilBaWoqGhgbIsoza2loAQG9vL55//nns2rXLZf2tW7ciPT0d7777LvLy8rBhwwYfTomIiEbCbREcPXoUGRkZiImJwdixY6HVamEymZzjHR0dGBgYQFpaGgBAp9M5xw8dOoSpU6fiiSeecNnn4cOHkZubCwCYP38+/v73v8Nut/tsUkRE5Dm3RWCxWKBWq53LkiTBbDYPO65Wq53jCxcuRGFhIcLDw4fdp1KpRFRUFHp6erybCRERjYrS3QoOhwMKhcK5LMuyy7K7cU/IsoywMM+ft46LixrR/m9ErY4e9baqiHCv9+GLHL4SChmA0Mjhjwwj/X0JhfMAhEaOUMgAhEYOf2ZwWwQJCQloaWlxLlutVkiS5DL+3Sd7u7q6XMZvRJIkdHV1ISEhAYODg+jr60NMTIzHobu7bXA4ZI/Xv5ZaHQ2rtXfU29svDwGAV/vwRQ5fCIUMoZLDXxlG8vsSCuchVHKEQoZQyeFthrAwxU3vQLu9G56ZmYmmpib09PSgv78fjY2N0Gg0zvHExERERkaitbUVAGA0Gl3Gb2Tu3LkwGAwAgPr6eqSnp0OlUnk0ISIi8i23RRAfH4/Vq1dDr9dj4cKFmD9/PmbNmoWCggKcOHECAFBeXo6NGzciOzsbly5dgl6vv+k+n332WRw7dgwPP/wwqqurUVpa6pvZEBHRiLl9aAgAcnNzna/yuaqystL5c3JyMvbu3Tvs9itWrHBZjomJwauvvjqSnERE5Cd8ZzERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREglMGOwDd3g4f68CHJ81u11NFhMN+eSgAiQKf4aylFwCwqeqfQcswUv7I8cOZ8fhxWqJP90m+wSsC8qsPT5px1mILdoygmiJFY4oUHewYQXXWYvPoDgEFB68IyO+mSFFYk3/vTddRq6NhtfYGKBEzBDqHJ1dDFDy8IiAiEpxHRVBXV4ecnBxkZWWhqqrquvG2tjbodDpotVoUFxdjcHAQANDZ2Yn8/HxkZ2dj+fLl6OvrAwB8++23KCgowCOPPIJFixahra3Nh1MiIqKRcFsEZrMZFRUVqK6uhsFgQE1NDdrb213WKSoqQmlpKRoaGiDLMmprawEAZWVlWLJkCUwmE1JSUrBjxw4AwK5duzB9+nT89a9/xVNPPYUXXnjBD1MjIiJPuC2Co0ePIiMjAzExMRg7diy0Wi1MJpNzvKOjAwMDA0hLSwMA6HQ6mEwm2O12NDc3Q6vVutwOAA6Hw3l10N/fjzFjxvh8YkRE5Bm3TxZbLBao1WrnsiRJ+Pjjj4cdV6vVMJvNuHDhAqKioqBUKl1uB4Bf/vKXWLx4MR544AH09fXhzTff9NmEiIhoZNwWgcPhgEKhcC7LsuyyPNz4tesBcC6/+OKLyM/Ph16vx0cffYTVq1fjnXfewbhx4zwKHRcX5dF6N6NWj/7lfKqIcK/34YscvuLPDCM5V7f7ubiVMgC+zTHav5nb8VyEYga3RZCQkICWlhbnstVqhSRJLuNWq9W53NXVBUmSEBsbi97eXgwNDSE8PNxlu0OHDjmfF7jnnnsQFxeH06dPY9asWR6F7u62weGQPZvhDXj70rirb7Tx9uV1ofBSQX9n8PRciXAubpUM/sgxmr+Z2/VcBCNDWJjipneg3T5HkJmZiaamJvT09KC/vx+NjY3QaDTO8cTERERGRqK1tRUAYDQaodFooFKpkJ6ejvr6egCAwWBwbpecnIyDBw8CAM6cOQOLxYJp06aNepJERDR6bosgPj4eq1evhl6vx8KFCzF//nzMmjULBQUFOHHiBACgvLwcGzduRHZ2Ni5dugS9Xg8AWLduHWpra5GTk4OWlhasWrUKAPDyyy/jL3/5C+bPn4/nnnsOmzZtQnR08C+9iIhE5NE7i3Nzc5Gbm+tyW2VlpfPn5ORk7N2797rtEhMTsWfPnutunzp1Knbv3j3SrERE5Ad8ZzERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgvOoCOrq6pCTk4OsrCxUVVVdN97W1gadTgetVovi4mIMDg4CADo7O5Gfn4/s7GwsX74cfX19AACbzYZf/epXWLhwIRYuXIiTJ0/6cEpERDQSbovAbDajoqIC1dXVMBgMqKmpQXt7u8s6RUVFKC0tRUNDA2RZRm1tLQCgrKwMS5YsgclkQkpKCnbs2AEA2LhxIyZNmgSDwYDnnnsOv/3tb30/MyIi8ojbIjh69CgyMjIQExODsWPHQqvVwmQyOcc7OjowMDCAtLQ0AIBOp4PJZILdbkdzczO0Wq3L7bIso7GxEYWFhQAAjUaDl156yR9zIyIiD7gtAovFArVa7VyWJAlms3nYcbVaDbPZjAsXLiAqKgpKpdLl9u7ubkRERKC6uhqLFy+GXq/H0NCQL+dEREQjoHS3gsPhgEKhcC7LsuyyPNz4tesBgEKhwNDQELq6uhAdHY2amhp88MEHePrpp3Ho0CGPQ8fFRXm87nDU6uhRb6uKCPd6H77I4Sv+zDCSc3W7n4tbKQPg2xyj/Zu5Hc9FKGZwWwQJCQloaWlxLlutVkiS5DJutVqdy11dXZAkCbGxsejt7cXQ0BDCw8Od202cOBFKpRLz588HANx///24dOkSuru7ERcX51Ho7m4bHA7Z40leS62OhtXaO+rt7ZevXMF4sw9f5PAFf2fw9FyJcC5ulQz+yDGav5nb9VwEI0NYmOKmd6DdPjSUmZmJpqYm9PT0oL+/H42NjdBoNM7xxMREREZGorW1FQBgNBqh0WigUqmQnp6O+vp6AIDBYIBGo0FERAQyMzPxzjvvAACOHTuGO+64AxMnThz1JImIaPTcXhHEx8dj9erV0Ov1sNvtWLRoEWbNmoWCggKsXLkSqampKC8vR0lJCWw2G2bOnAm9Xg8AWLduHdauXYudO3di0qRJ2LJlCwBgw4YNKC0tRXV1NZRKJSoqKhAWxrc0EHnr8LEOfHjS7H5FN1QR4c578b5w1nLl3uymqn8GLcNwfjgzHj9OS/T7cUKZ2yIAgNzcXOTm5rrcVllZ6fw5OTkZe/fuvW67xMRE7Nmz57rbJUnCq6++OtKsROTGhyfNOGuxYYrk/fNovjRFCv5j7Ddy1mIDABZBsAMQkW9NkaKwJv9er/ZxOzwu7omRXKHczvh4DBGR4FgERESCYxEQEQmORUBEJDgWARGR4FgERESCYxEQEQmORUBEJDgWARGR4FgERESCYxEQEQmORUBEJDgWARGR4FgERESCYxEQEQmORUBEJDgWARGR4FgERESCYxEQEQmORUBEJDgWARGR4FgERESCYxEQEQlOGewAgXa57TA6Tc2w2wdHvY+h7v8CAFyqa/Aqy8W0HwOTf+TVPoiIvCVcEQy2N0HuOQdF7ORR72PVtDavcwx1n4Xt5BGoWAREFGQeFUFdXR127tyJwcFB/OIXv0B+fr7LeFtbG4qLi9HX14f09HSUlZVBqVSis7MTRUVF6O7uxrRp01BeXo5x48Y5t/v666/xyCOPYN++fUhKSvLtzG4iIn4aVNlFATvejVyq2xjU4xMRXeX2OQKz2YyKigpUV1fDYDCgpqYG7e3tLusUFRWhtLQUDQ0NkGUZtbW1AICysjIsWbIEJpMJKSkp2LFjh3Mbh8OB4uJi2O12H0+JiIhGwm0RHD16FBkZGYiJicHYsWOh1WphMpmc4x0dHRgYGEBaWhoAQKfTwWQywW63o7m5GVqt1uX2q15//XVkZmZi4sSJvp4TERGNgNsisFgsUKvVzmVJkmA2m4cdV6vVMJvNuHDhAqKioqBUKl1uB4BPPvkE//jHP/DEE0/4bCJERDQ6bp8jcDgcUCgUzmVZll2Whxu/dj0AUCgU6O/vR1lZGX7/+98jLGx0r16Ni4sa1XYA0Km6WkzRo96HL4RKDn9nUEWEe3yM2/1cBCLDSM63P3P4ir8zeHq+bvdz4bYIEhIS0NLS4ly2Wq2QJMll3Gq1Ope7urogSRJiY2PR29uLoaEhhIeHO7draWlBd3c3li9fDuDKFUVhYSH+8Ic/4M477/QodHe3DQ6H7PEkv8tuH4RKpYTV2juq7X0lFHIc6fgHjvecgN0+5LdjfNFz5UUAxQ3/d9P1VKpwr3Kkx6fhgcSMUW8PXPlDC/bvhbcZ7JevnENv53E7nAtPeHK+bodzERamuOkdaLd3yTMzM9HU1ISenh709/ejsbERGo3GOZ6YmIjIyEi0trYCAIxGIzQaDVQqFdLT01FfXw8AMBgM0Gg0mDNnDt577z0YjUYYjUZIkoTXXnvN4xIg32kxH8OZb8779Rh3/fd53PXf/j3GeVsnWszH/HoMotuZ2yuC+Ph4rF69Gnq9Hna7HYsWLcKsWbNQUFCAlStXIjU1FeXl5SgpKYHNZsPMmTOh1+sBAOvWrcPatWuxc+dOTJo0CVu2bPH7hGhkpsYk4enUgmDH8Ooez9Z/vurjNERi8eh9BLm5ucjNzXW5rbKy0vlzcnIy9u7de912iYmJ2LNnz033/d5773kSgYiI/ISfNUREJDgWARGR4FgERESCYxEQEQmORUBEJDgWARGR4FgERESCYxEQEQmORUBEJDgWARGR4FgERESCYxEQEQmORUBEJDgWARGR4Dz6GGoiokA6dawTZz7vdn6DmL90WWwAAGPV8F9spIoI9zrH3TMlfD/tP7zahz+xCIgo5Hx+0oJuax/i1OP8epwHpPF+3T/w77JhERARjVBC4njk5KUGO4bX3xd8s6uNUMHnCIiIBMciICISHIuAiEhwLAIiIsGxCIiIBMciICISHF8+SkF3pOMfOH7iBOz20b1p53xvJwBg6z9f9SrHT/4zA7PHp3m1D6JbEa8IKOhazMdw5pvzo94+Kfo/kBTt3Zt1zts6ceTLZq/2QXSr4hUBhYSpMUl4OrUgaMf39mqC6Fbm0RVBXV0dcnJykJWVhaqqquvG29raoNPpoNVqUVxcjMHBQQBAZ2cn8vPzkZ2djeXLl6Ovrw8AcPr0aeTn52PBggVYvHgx2trafDglIiIaCbdFYDabUVFRgerqahgMBtTU1KC9vd1lnaKiIpSWlqKhoQGyLKO2thYAUFZWhiVLlsBkMiElJQU7duwAAJSUlKCgoABGoxGrVq3CmjVr/DA1IiLyhNsiOHr0KDIyMhATE4OxY8dCq9XCZDI5xzs6OjAwMIC0tCtPsul0OphMJtjtdjQ3N0Or1brcDgB5eXmYM2cOAGDGjBn46quvfD4xIiLyjNvnCCwWC9RqtXNZkiR8/PHHw46r1WqYzWZcuHABUVFRUCqVLrcDV0rhqm3btmHevHkjCh0XFzWi9b+rU3U1T/So9+ELoZBDpQoPeoZQyREKGa7yJoMqwnfzCOq/hw/n4Quh8G/iz3PhtggcDgcUCoVzWZZll+Xhxq9dD8B1623evBnHjx/H7t27RxS6u9sGh0Me0TZX2e2DUKmUXn2aoC+EQg67fQgqVXgInIvg5wiFDID3n3R59XPzvZ2Htzm8Zb88BFVE8P89gND4N/E2Q1iY4qZ3oN0+NJSQkACr1epctlqtkCRp2PGuri5IkoTY2Fj09vZiaGjouu0GBwfx61//GidOnMDu3bsRHR0arU9EJCK3VwSZmZnYvn07enp6cMcdd6CxsREvvviiczwxMRGRkZFobW3FfffdB6PRCI1GA5VKhfT0dNTX1yM3NxcGgwEajQYAsGnTJthsNrz55puIiIjw3+yIbiHfvH8YX3/UDPvlwVHv41+O/wIAnNvc4FWWoXk/Rvi9P/JqH3TrcFsE8fHxWL16NfR6Pex2OxYtWoRZs2ahoKAAK1euRGpqKsrLy1FSUgKbzYaZM2dCr9cDANatW4e1a9di586dmDRpErZs2YKenh5UVVUhKSkJeXl5zuMYjUb/zZLoFtD7YRMud5xDROLkUe/jiTDvX4r9r3NnYf37ESSwCITh0RvKcnNzkZub63JbZWWl8+fk5GTs3bv3uu0SExOxZ8+e624/derUSHMSCWHctGlIWFUU1AznNm8M6vEp8PgRE0REgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4FgERkeBYBEREgmMREBEJjkVARCQ4fnk9EdEwTh3rxJnPu53fKTAaXRYbAMBYdWzU+7jnh1Mw5T9jR729O7wiICIaxucnLfi646JX+/ieFIXvSaP/VsUuiw2ffNThVQZ3eEVARHQTCYnjkZOXGrTje3Ml4SleERARCY5FQEQkOBYBEZHgWARERIJjERARCY5FQEQkOBYBEZHgWARERIJjERARCY5FQEQkOBYBEZHgWARERILzqAjq6uqQk5ODrKwsVFVVXTfe1tYGnU4HrVaL4uJiDA4OAgA6OzuRn5+P7OxsLF++HH19fQCAixcvorCwEA899BDy8/NhtVp9OCUiIhoJt0VgNptRUVGB6upqGAwG1NTUoL293WWdoqIilJaWoqGhAbIso7a2FgBQVlaGJUuWwGQyISUlBTt27AAAbN26Fenp6Xj33XeRl5eHDRs2+GFqRETkCbcfQ3306FFkZGQgJiYGAKDVamEymfDMM88AADo6OjAwMIC0tDQAgE6nw7Zt25CXl4fm5ma88sorztsff/xxFBUV4fDhw84ri/nz5+OFF16A3W6HSqXyKHRYmGLkM/3/wqNiEK5UerUPXwiFHDFjxkOlCg/6uQiFHKGQIWJiDJSq4P9uhkKOqPGRUIbA72Yo5PBFBnfbui0Ci8UCtVrtXJYkCR9//PGw42q1GmazGRcuXEBUVBSUSqXL7dduo1QqERUVhZ6eHsTHx3s0qYkTx3m03g09+r+j39aXQiBH0dz/CXYEAKGRIxQyxD0f/N8JIDRyPPbLHwQ7AoDQyBGIDG4fGnI4HFAo/t0msiy7LA83fu16AK5b/u42YWF83pqIKBjc/u+bkJDg8mSu1WqFJEnDjnd1dUGSJMTGxqK3txdDQ0PXbSdJErq6ugAAg4OD6Ovrcz70REREgeW2CDIzM9HU1ISenh709/ejsbERGo3GOZ6YmIjIyEi0trYCAIxGIzQaDVQqFdLT01FfXw8AMBgMzu3mzp0Lg8EAAKivr0d6errHzw8QEZFvKWRZlt2tVFdXhz/+8Y+w2+1YtGgRCgoKUFBQgJUrVyI1NRWffvopSkpKYLPZMHPmTGzcuBERERHo6OjA2rVr0d3djUmTJmHLli2YMGECvvnmG6xduxbnzp1DdHQ0ysvLkZSUFIj5EhHRNTwqAiIiun3xGVoiIsGxCIiIBMciICISHIuAiEhwbt9ZfLuRZRlr167F9OnTsWzZsqBk2LNnD9566y2MGTMGd911F0pLSwP+PoqXX34ZJpMJEyZMAABMmzYNW7duDWiGqw4ePIiioiJ89NFHQTn+Z599hvXr16O3txdhYWF44YUXkJKSEvAcb731Ft5++20oFApMnjwZ69evR1xcXEAzHDhwANu2bUNYWBgmTJiA9evXY8qUKQHNYDAYsGvXLudyb28vzGYz3n//fXzve98LWI7Dhw/jd7/7HS5fvowZM2bgpZdeQlRUVMCOD1x5Of4bb7wBhUKBO+64A8XFxUhNTfX9gWSBtLe3y0uXLpVnz54tv/7660HJ0NTUJM+ZM0f+6quvZFmW5f3798srVqwIeI5HH31Ubm1tDfhxr/XFF1/I8+bNk9PS0oJy/EuXLsn333+/fPjwYVmWZfnAgQOyVqsNeI4TJ07IP/nJT+SLFy/KsizLL7/8svyb3/wmoBn6+/vl2bNny2fOnJFlWZZ37dolFxQUBDTDtS5fviw/+uij8ttvvx3Q43Z3d8sZGRnyF198IcuyLG/evFlet25dQDOcPn1avv/++2Wz2SzLsiwfPnxYnjt3rl+OJdRDQ1VVVcjLy0N2dnbQMpw8eRKZmZlISEgAAGRlZeG9997D5cuXA5bh8uXLOHXqFF5//XXk5uZixYoV6OzsDNjxr+rv70dRURHWrl0b8GNf9cEHH2Dy5MmYO3cuAODBBx8MypVRSkoKGhoaEB0djX/9618wm80Bv0ocGhqCLMvo7e0FAPT19SEyMjKgGa5VWVmJ2NhYPPbYYwE97pEjR5CamoqpU6cCAH7+85+jrq4OcgBfbR8REYH169c7P5EhJSUFXV1dfvm/QqiHhkpLSwFc+eMPltmzZ2PPnj3o6OhAYmIi9u3bB7vdjm+++cblozv8yWw2IyMjA6tWrcLdd9+NN954A0899RT2798/7OdB+UNpaSkWL16MGTNmBOyY1/riiy+gVqvx/PPP49NPP8X48eNRVFQUlCwqlQoHDx5EcXExIiIisHLlyoAef9y4cSgrK8Njjz2GmJgYOBwOvP322wHN8F09PT3YtWsX9u3bF/Bjf/311847a8CVj9Kx2Wzo6+sL2MNDSUlJzjfayrKMjRs34qc//SkiIiJ8fiyhrghCQXp6Op5++mk888wz0Ol0UCgUiImJCehHbEyePBmVlZWYPn06FAoFli1bhrNnz+L8+fMBy1BVVQWlUolFixYF7Jg3Mjg4iPfffx+LFy/Gvn378Pjjj6OwsDCgV2jfNW/ePHz44YdYsWIFli1bBofDEbBjf/bZZ3jllVdQX1+PI0eO4Mknn8SKFSsCei/4u2pra/Hggw9i8uTJAT/2tR+meVUwPhzz0qVLePbZZ3H27FmsX7/eL8dgEQSYzWbDD37wA+zfvx/79u3DvHnzACCgDwN8+umnzs96ukqW5YCW0f79+3HixAksWLAAhYWFGBgYwIIFC5wfVR4okiThrrvuwuzZswFc+Y94aGgI586dC2iOL7/8Ei0tLc7ln/3sZ+js7MS3334bsAxHjhzBvffe63xyOD8/H59//jkuXLgQsAzfVV9fD51OF5RjT5o0CRaLxblsNpsxYcIEjB07NqA5Ojs78dhjjyE8PBy7d+/G+PHj/XIcFkGAWSwWLF26FDabDQCwc+dOPPzwwwF9SCYsLAwbNmxw/mdXXV2NGTNmuFwK+9vevXvxt7/9DUajEa+99hrGjBkDo9Ho8XdS+IpGo8H58+fxySefAACam5uhUCgC/tlXVqsVzz33HHp6egBc+Xyvu+++GyQ+OK0AAAFESURBVBMnTgxYhu9///tobm52fjLwwYMHkZSUhNjY2IBluOrbb7/F2bNncc899wT82ADwwAMP4Pjx4zhz5gwA4M9//jMefPDBgGaw2WxYunQpsrKyUFFRgTFjxvjtWEI9RxAK7rzzThQWFiIvLw8OhwP33Xef87mLQJk+fTpKSkqwfPlyDA0NISEhAVu2bAlohlChVqvxyiuvoKysDP39/YiIiMD27dsD/iRpeno6nnzySej1eoSHh0OSJOe3+wXKj370IyxbtgxLly6FSqXChAkTnF8vG2hffvkl1Gp10D6VOC4uDhs3bsTKlStht9sxZcoUbNq0KaAZqqqq0NnZiQMHDuDAgQPO2//0pz/5/A4CP3SOiEhwfGiIiEhwLAIiIsGxCIiIBMciICISHIuAiEhwLAIiIsGxCIiIBMciICIS3P8DHTcE7/4IXWIAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dgram = hierarchy.dendrogram(row_linkage, color_threshold=0.005)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'icoord': [[5.0, 5.0, 15.0, 15.0],\n",
" [35.0, 35.0, 45.0, 45.0],\n",
" [25.0, 25.0, 40.0, 40.0],\n",
" [55.0, 55.0, 65.0, 65.0],\n",
" [85.0, 85.0, 95.0, 95.0],\n",
" [75.0, 75.0, 90.0, 90.0],\n",
" [60.0, 60.0, 82.5, 82.5],\n",
" [32.5, 32.5, 71.25, 71.25],\n",
" [10.0, 10.0, 51.875, 51.875]],\n",
" 'dcoord': [[0.0, 0.004256456009326175, 0.004256456009326175, 0.0],\n",
" [0.0, 0.0023192825225930593, 0.0023192825225930593, 0.0],\n",
" [0.0, 0.003834496320219779, 0.003834496320219779, 0.0023192825225930593],\n",
" [0.0, 0.001817534593237589, 0.001817534593237589, 0.0],\n",
" [0.0, 0.0009665986618332519, 0.0009665986618332519, 0.0],\n",
" [0.0, 0.0028104314844498227, 0.0028104314844498227, 0.0009665986618332519],\n",
" [0.001817534593237589,\n",
" 0.005744138760901555,\n",
" 0.005744138760901555,\n",
" 0.0028104314844498227],\n",
" [0.003834496320219779,\n",
" 0.008380764348347213,\n",
" 0.008380764348347213,\n",
" 0.005744138760901555],\n",
" [0.004256456009326175,\n",
" 0.011875889940371007,\n",
" 0.011875889940371007,\n",
" 0.008380764348347213]],\n",
" 'ivl': ['1', '9', '5', '4', '6', '3', '8', '7', '0', '2'],\n",
" 'leaves': [1, 9, 5, 4, 6, 3, 8, 7, 0, 2],\n",
" 'color_list': ['C1', 'C2', 'C2', 'C3', 'C4', 'C4', 'C0', 'C0', 'C0']}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dgram"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def get_node_id(node):\n",
" assert type(node) == scipy.cluster.hierarchy.ClusterNode\n",
" if node.count == 1:\n",
" node_id = []\n",
" node_id.append(node.id)\n",
" # node_id.append(right_node.id)\n",
" return node_id\n",
" else:\n",
" left_node = node.left\n",
" right_node = node.right\n",
" return get_node_id(left_node) + get_node_id(right_node)\n",
"\n",
"def split_tree_by_distance(node, threshold):\n",
" assert type(node) == scipy.cluster.hierarchy.ClusterNode\n",
" if node.count < 2:\n",
" return node\n",
" else:\n",
" if node.dist <= threshold:\n",
" return [node]\n",
" else:\n",
" return split_tree_by_distance(node.left, threshold) + split_tree_by_distance(node.right, threshold)\n",
"\n",
"def cut_cluster_by_distance(linkage_matrix, threshold):\n",
" \"\"\"\n",
" linkage_matrix: the matrix comes from hierarchy.linkage, \n",
" https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage\n",
" threshold: a float, max distance of clusters in the tree at the cut point\n",
" \"\"\"\n",
" node_id2subcluster = {}\n",
" rootnode = hierarchy.to_tree(linkage_matrix)\n",
" subcluster_rootnodes = split_tree_by_distance(rootnode, threshold=threshold)\n",
" for i, sub_rootnodes in enumerate(subcluster_rootnodes):\n",
" subcluster_nodes = get_node_id(sub_rootnodes)\n",
" for _node_id in subcluster_nodes:\n",
" node_id2subcluster[_node_id] = i\n",
" return node_id2subcluster"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{1: 0, 9: 0, 5: 1, 4: 1, 6: 1, 3: 2, 8: 2, 7: 3, 0: 3, 2: 3}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cut_cluster_by_distance(row_linkage, 0.005)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment