Skip to content

Instantly share code, notes, and snippets.

@hardingnj
Created October 2, 2018 08:51
Show Gist options
  • Save hardingnj/9f67bae7259945d35d9f41601d7feac5 to your computer and use it in GitHub Desktop.
Save hardingnj/9f67bae7259945d35d9f41601d7feac5 to your computer and use it in GitHub Desktop.
Dendrograms
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Defining colours\n",
"mycol_regions = ['#00cc00', '#ffb266', '#BF7DFF', '#cc6600', '#e31a1c', '#0331E9']\n",
"#order = ETH, LA, NEKH, NKH, VN, WKH\n",
"mycol_clades = ['#e0e0e0', '#0331E9', '#e31a1c', '#ffb266', '#BF7DFF', '#00cc00']\n",
"#order = -, A, B, C, D, E"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def plot_dendrogram(dist, nhaps, ax, method='complete', color_threshold=0, above_threshold_color='k'):\n",
" \n",
" # faff\n",
" y = _convert_to_double(np.asarray(dist, order='c'))\n",
" \n",
" # 2. get n\n",
" n = int(distance.num_obs_y(dist))\n",
" \n",
" # 3. do clustering\n",
" method = dict(single=0, complete=1)[method]\n",
" z = _hierarchy.linkage(y, n, method) \n",
"\n",
" # plot dendrogram\n",
" sns.despine(ax=ax, offset=5, bottom=True, top=False)\n",
" r = scipy.cluster.hierarchy.dendrogram(\n",
" z, no_labels=True, count_sort=True,\n",
" color_threshold=color_threshold, \n",
" above_threshold_color=above_threshold_color,\n",
" ax=ax)\n",
" \n",
" xmin, xmax = ax.xaxis.get_data_interval()\n",
" xticklabels = np.array(list(range(0, nhaps, 200)) + [nhaps])\n",
" \n",
" xticks = xticklabels / nhaps\n",
" xticks = (xticks * (xmax - xmin)) + xmin\n",
" ax.set_xticks(xticks)\n",
" ax.set_xticklabels(xticklabels)\n",
" ax.set_xlabel('ESEA KEL1/PLA1 samples')\n",
" ax.xaxis.set_label_position('top')\n",
" ax.set_ylim(bottom=-0.0001)\n",
"# ax.set_xlim(left=-10)\n",
" ax.set_ylabel('Genetic distance')\n",
"\n",
" ax.autoscale(axis='x', tight=True)\n",
" return z, r"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def make_color_bar(meta, variable, ax, ix):\n",
" \n",
" x = meta.iloc[ix]\n",
" \n",
" values = pd.Categorical(x[variable])\n",
" \n",
" pal = mycol_regions\n",
" \n",
" clrs = dict(zip(values.categories, pal))\n",
" \n",
" ax.broken_barh(\n",
" xranges=[(i, 1) for i in range(x.shape[0])], \n",
" yrange=(0, 1), \n",
" color=[clrs[v] for v in values])\n",
" \n",
" sns.despine(ax=ax, offset=5, left=True, bottom=True)\n",
" ax.set_xticks([])\n",
" ax.set_yticks([])\n",
" ax.set_xlim(0, x.shape[0])\n",
" ax.yaxis.set_label_position('left')\n",
" ax.set_ylabel(variable, rotation=0, ha='right', va='center')\n",
" \n",
" return clrs, values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(10, 8))\n",
"gs = GridSpec(4, 1, height_ratios=(3, 1, 1, 1), )\n",
"\n",
"ax1 = plt.subplot(gs[0])\n",
"zz, rr = plot_dendrogram(pdist_np_select_linear, df_meta_matched.shape[0], ax1)\n",
"\n",
"for i, x in enumerate([\"RegionCode\", \"K1P_clade\"]):\n",
" ax2 = plt.subplot(gs[i + 1])\n",
" col_dict, v = make_color_bar(\n",
" df_meta_matched, \n",
" x, \n",
" ax2,\n",
" rr[\"leaves\"])\n",
" \n",
" handles = [\n",
" mpl.patches.Patch(\n",
" color=v, label=k) for k, v in col_dict.items()]\n",
"\n",
" ax2.legend(\n",
" handles=handles, \n",
" loc=0, \n",
" bbox_to_anchor=(1, 1), ncol=6)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment