Created
August 14, 2022 19:42
-
-
Save nsapoval/d92fda20ae846675cb1632ebe3eed370 to your computer and use it in GitHub Desktop.
Phylogenetic vignettes - UPGMA and Neighbor joining.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Phylogenetic vignettes - UPGMA and Neighbor joining.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyPVfipZzJZN/Vey47uM5JpS", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/nsapoval/d92fda20ae846675cb1632ebe3eed370/phylogenetic-vignettes-upgma-and-neighbor-joining.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import numpy as np" | |
], | |
"metadata": { | |
"id": "FYOGmlLC09rq" | |
}, | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#!pip install toytree\n", | |
"import toytree as tt\n", | |
"import toyplot" | |
], | |
"metadata": { | |
"id": "2Es7QS2E1CAx" | |
}, | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"id": "6A2TRsBG0xVG" | |
}, | |
"outputs": [], | |
"source": [ | |
"def UPGMA(D, cluster_sizes, clusters):\n", | |
" while D.shape != (1, 1):\n", | |
" min_ij = (-1, -1)\n", | |
" min_dist = D.sum()\n", | |
" for i in range(len(D)):\n", | |
" for j in range(i+1, len(D)):\n", | |
" if D[i, j] < min_dist:\n", | |
" min_dist = D[i, j]\n", | |
" min_ij = (i, j)\n", | |
"\n", | |
" c_ij = cluster_sizes[min_ij[0]] + cluster_sizes[min_ij[1]]\n", | |
"\n", | |
" dist_ij = []\n", | |
" for k in range(len(D)):\n", | |
" if k != min_ij[0] and k != min_ij[1]:\n", | |
" dist_ij.append(cluster_sizes[min_ij[0]] / c_ij * D[min_ij[0], k] + \\\n", | |
" cluster_sizes[min_ij[1]] / c_ij * D[min_ij[1], k])\n", | |
" dist_ij.append(0)\n", | |
"\n", | |
" del cluster_sizes[min_ij[0]]\n", | |
" del cluster_sizes[min_ij[1] - 1]\n", | |
" cluster_sizes.append(c_ij)\n", | |
" \n", | |
" clusters.append(f\"({clusters[min_ij[0]]},{clusters[min_ij[1]]})\")\n", | |
" del clusters[min_ij[0]]\n", | |
" del clusters[min_ij[1] - 1]\n", | |
"\n", | |
" D = np.delete(D, [min_ij[0], min_ij[1]], axis=0)\n", | |
" D = np.delete(D, [min_ij[0], min_ij[1]], axis=1)\n", | |
" D = np.vstack([D, dist_ij[:-1]])\n", | |
" D = np.column_stack([D, dist_ij])\n", | |
"\n", | |
" return clusters" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"D = np.asarray([[0, 32, 48, 51, 50, 48, 98, 148],\n", | |
" [32, 0, 26, 34, 29, 33, 84, 136],\n", | |
" [48, 26, 0, 42, 44, 44, 92, 152],\n", | |
" [51, 34, 42, 0, 44, 38, 86, 142],\n", | |
" [50, 29, 44, 44, 0, 24, 89, 142],\n", | |
" [48, 33, 44, 38, 24, 0, 90, 142],\n", | |
" [98, 84, 92, 86, 89, 90, 0, 148],\n", | |
" [148, 136, 152, 142, 142, 48, 98, 0],])\n", | |
"cluster_sizes = [1, 1, 1, 1, 1, 1, 1, 1]\n", | |
"clusters = [\"Dog\", \"Bear\", \"Raccoon\", \"Weasel\", \"Seal\", \"Sea lion\", \"Cat\", \"Monkey\"]\n", | |
"\n", | |
"newick = UPGMA(D, cluster_sizes, clusters)[0]+\";\"\n", | |
"tree = tt.tree(newick)\n", | |
"canvas = toyplot.Canvas(width=600, height=600, style={\"background-color\":\"white\"})\n", | |
"ax = canvas.cartesian()\n", | |
"ax.x.show = False\n", | |
"ax.y.show = False\n", | |
"tree.draw(axes=ax, tip_labels_style={\"font-size\": \"18pt\"})" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 673 | |
}, | |
"id": "6B_Az--f1Hjd", | |
"outputId": "f76f404a-f3ae-4016-a5d9-45e59ad95425" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(None,\n", | |
" <toyplot.coordinates.Cartesian at 0x7f90a7351a50>,\n", | |
" <toytree.Render.ToytreeMark at 0x7f90a139d090>)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 4 | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"<div class=\"toyplot\" id=\"tedd512a2e210400383c0c3db239608b9\" style=\"text-align:center\"><svg class=\"toyplot-canvas-Canvas\" height=\"600.0px\" id=\"t09f4ce64198f433da43d386d5c1514f5\" preserveAspectRatio=\"xMidYMid meet\" style=\"background-color:white;border-color:#292724;border-style:none;border-width:1.0;fill:rgb(16.1%,15.3%,14.1%);fill-opacity:1.0;font-family:Helvetica;font-size:12px;opacity:1.0;stroke:rgb(16.1%,15.3%,14.1%);stroke-opacity:1.0;stroke-width:1.0\" viewBox=\"0 0 600.0 600.0\" width=\"600.0px\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:toyplot=\"http://www.sandia.gov/toyplot\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"><g class=\"toyplot-coordinates-Cartesian\" id=\"td0af6438873944fd97e8c7f32067684e\"><clipPath id=\"tb028d5a154a74eea8d530582637d6565\"><rect height=\"540.0\" width=\"540.0\" x=\"30.0\" y=\"30.0\"></rect></clipPath><g clip-path=\"url(#tb028d5a154a74eea8d530582637d6565)\"><g class=\"toytree-mark-Toytree\" id=\"tab2c5b0f4fba46bf8fdde359f7657a25\"><g class=\"toytree-Edges\" style=\"fill:none;stroke:rgb(14.9%,14.9%,14.9%);stroke-linecap:round;stroke-opacity:1;stroke-width:2\"><path d=\"M 51.6 137.0 L 51.6 205.1 L 117.1 205.1\" id=\"14,13\"></path><path d=\"M 117.1 205.1 L 117.1 275.2 L 182.6 275.2\" id=\"13,12\"></path><path d=\"M 182.6 275.2 L 182.6 349.5 L 248.1 349.5\" id=\"12,11\"></path><path d=\"M 248.1 349.5 L 248.1 432.1 L 313.6 432.1\" id=\"11,10\"></path><path d=\"M 313.6 432.1 L 313.6 366.0 L 379.1 366.0\" id=\"10,9\"></path><path d=\"M 313.6 432.1 L 313.6 498.1 L 379.1 498.1\" id=\"10,8\"></path><path d=\"M 51.6 137.0 L 51.6 68.9 L 117.1 68.9\" id=\"14,7\"></path><path d=\"M 117.1 205.1 L 117.1 134.9 L 182.6 134.9\" id=\"13,6\"></path><path d=\"M 182.6 275.2 L 182.6 200.9 L 248.1 200.9\" id=\"12,5\"></path><path d=\"M 248.1 349.5 L 248.1 267.0 L 313.6 267.0\" id=\"11,4\"></path><path d=\"M 379.1 366.0 L 379.1 333.0 L 444.6 333.0\" id=\"9,3\"></path><path d=\"M 379.1 366.0 L 379.1 399.1 L 444.6 399.1\" id=\"9,2\"></path><path d=\"M 379.1 498.1 L 379.1 465.1 L 444.6 465.1\" id=\"8,1\"></path><path d=\"M 379.1 498.1 L 379.1 531.1 L 444.6 531.1\" id=\"8,0\"></path></g><g class=\"toytree-TipLabels\" style=\"fill:rgb(14.9%,14.9%,14.9%);fill-opacity:1.0;font-family:helvetica;font-size:18pt;font-weight:normal;stroke:none;white-space:pre\"><g transform=\"translate(444.55,531.14)rotate(0)\"><text style=\"\" x=\"15.00\" y=\"6.13\">Raccoon</text></g><g transform=\"translate(444.55,465.10)rotate(0)\"><text style=\"\" x=\"15.00\" y=\"6.13\">Bear</text></g><g transform=\"translate(444.55,399.06)rotate(0)\"><text style=\"\" x=\"15.00\" y=\"6.13\">Sealion</text></g><g transform=\"translate(444.55,333.02)rotate(0)\"><text style=\"\" x=\"15.00\" y=\"6.13\">Seal</text></g><g transform=\"translate(313.56,266.98)rotate(0)\"><text style=\"\" x=\"15.00\" y=\"6.13\">Weasel</text></g><g transform=\"translate(248.06,200.94)rotate(0)\"><text style=\"\" x=\"15.00\" y=\"6.13\">Dog</text></g><g transform=\"translate(182.57,134.90)rotate(0)\"><text style=\"\" x=\"15.00\" y=\"6.13\">Cat</text></g><g transform=\"translate(117.07,68.86)rotate(0)\"><text style=\"\" x=\"15.00\" y=\"6.13\">Monkey</text></g></g></g></g></g></svg><div class=\"toyplot-behavior\"><script>(function()\n", | |
"{\n", | |
"var modules={};\n", | |
"})();</script></div></div>" | |
] | |
}, | |
"metadata": {} | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment