Skip to content

Instantly share code, notes, and snippets.

@golobor
Last active October 29, 2020 07:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save golobor/2d0dbeb331899aa1bb68912c751f2f4f to your computer and use it in GitHub Desktop.
Save golobor/2d0dbeb331899aa1bb68912c751f2f4f to your computer and use it in GitHub Desktop.
centromere_downloaded
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"toc": "true"
},
"cell_type": "markdown",
"source": "# Table of Contents\n <p><div class=\"lev1\"><a href=\"#Centromere-parsing-and-fetching-code\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Centromere parsing and fetching code</a></div><div class=\"lev1\"><a href=\"#A-user-oriented-centromere-fetching-function\"><span class=\"toc-item-num\">2&nbsp;&nbsp;</span>A user-oriented centromere fetching function</a></div>"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T05:10:05.235273Z",
"start_time": "2019-01-06T05:10:05.219857Z"
},
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "%config Completer.use_jedi = False",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T05:10:07.333070Z",
"start_time": "2019-01-06T05:10:06.281643Z"
},
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "import numpy as np\nimport pandas as pd\nimport bioframe, bioframe.io, bioframe.io.formats\nfrom bioframe.io.formats import read_gapfile\nfrom bioframe.io.resources import fetch_gaps",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T04:19:56.252851Z",
"start_time": "2019-01-06T04:19:27.071671Z"
}
},
"cell_type": "markdown",
"source": "# Centromere parsing and fetching code"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T05:10:08.622643Z",
"start_time": "2019-01-06T05:10:08.613964Z"
},
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "CENTROMERES_TXT_FIELDS = [\n 'bin',\n 'chrom',\n 'start',\n 'end',\n 'name']\n\n\ndef read_centxt(filepath_or_fp, chroms=None, **kwargs):\n centxt = pd.read_csv(\n filepath_or_fp,\n sep='\\t',\n names=CENTROMERES_TXT_FIELDS,\n **kwargs)\n if chroms is not None:\n centxt = centxt[centxt.chrom.isin(chroms)]\n return centxt\n\n\ndef fetch_centxt(db,**kwargs):\n return read_centxt(\n 'http://hgdownload.cse.ucsc.edu/goldenPath/{}/database/centromeres.txt.gz'.format(db),\n **kwargs)\n\n\ndef centromeres_from_centxt(centxt_df, merge=True):\n cens = centxt_df.copy()\n \n if merge:\n cens = cens.groupby('chrom').agg({'start':np.min,\n 'end':np.max}).reset_index()\n\n cens['mid'] = (cens['start'] + cens['end'])//2\n \n cens = (cens[['chrom','start','end','mid']]\n .sort_values('chrom')\n .reset_index(drop=True))\n \n return cens\n\ndef fetch_centromeres_centxt(db):\n centxt_df = fetch_centxt(db)\n return centromeres_from_centxt(centxt_df)",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T05:26:02.682939Z",
"start_time": "2019-01-06T05:26:02.671274Z"
},
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "def centromeres_from_gap(gap_df, merge=True):\n cens=gap_df[gap_df['type']=='centromere']\n \n if merge:\n cens = cens.groupby('chrom').agg({'start':np.min,\n 'end':np.max}).reset_index()\n\n cens['mid'] = (cens['start'] + cens['end'])//2\n cens = (cens[['chrom','start','end','mid']]\n .sort_values('chrom')\n .reset_index(drop=True))\n \n return cens\n\ndef fetch_centromeres_gap(db):\n gap_df=fetch_gaps(db)\n return centromeres_from_gap(gap_df)\n\n\nCYTOBAND_FIELDS = [\n 'chrom',\n 'start',\n 'end',\n 'name',\n 'gieStain']\n\ndef read_cytoband(filepath_or_fp, chroms=None, **kwargs):\n cytoband = pd.read_csv(\n filepath_or_fp,\n sep='\\t',\n names=CYTOBAND_FIELDS,\n **kwargs)\n if chroms is not None:\n cytoband = cytoband[cytoband.chrom.isin(chroms)]\n return cytoband\n\ndef fetch_cytoband(db, ideo=True, **kwargs):\n if ideo:\n return read_cytoband(\n 'http://hgdownload.cse.ucsc.edu/goldenPath/{}/database/cytoBandIdeo.txt.gz'.format(db),\n **kwargs)\n else:\n return read_cytoband(\n 'http://hgdownload.cse.ucsc.edu/goldenPath/{}/database/cytoBand.txt.gz'.format(db),\n **kwargs)\n \ndef centromeres_from_cytoband(cytoband_df, merge=True):\n cens = cytoband_df[cytoband_df.gieStain=='acen']\n \n if merge:\n cens = cens.groupby('chrom').agg({'start':np.min,\n 'end':np.max}).reset_index()\n\n cens['mid'] = (cens['start'] + cens['end'])//2\n \n cens = (cens[['chrom','start','end','mid']]\n .sort_values(['chrom','start'])\n .reset_index(drop=True))\n return cens\n\ndef fetch_centromeres_cytoband(db,ideo=True):\n cytoband_df = fetch_cytoband(db,ideo)\n return centromeres_from_cytoband(cytoband_df)",
"execution_count": 19,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T05:26:08.736009Z",
"start_time": "2019-01-06T05:26:04.934372Z"
},
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": 20,
"outputs": [
{
"data": {
"text/html": "<div>\n<style>\n .dataframe thead tr:only-child th {\n text-align: right;\n }\n\n .dataframe thead th {\n text-align: left;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>chr1</td>\n <td>121236957</td>\n <td>123476957</td>\n <td>122356957</td>\n </tr>\n <tr>\n <th>1</th>\n <td>chr10</td>\n <td>39244941</td>\n <td>41624941</td>\n <td>40434941</td>\n </tr>\n <tr>\n <th>2</th>\n <td>chr11</td>\n <td>51450781</td>\n <td>54450781</td>\n <td>52950781</td>\n </tr>\n <tr>\n <th>3</th>\n <td>chr12</td>\n <td>34747961</td>\n <td>36142961</td>\n <td>35445461</td>\n </tr>\n <tr>\n <th>4</th>\n <td>chr13</td>\n <td>16000000</td>\n <td>17868000</td>\n <td>16934000</td>\n </tr>\n <tr>\n <th>5</th>\n <td>chr14</td>\n <td>15070000</td>\n <td>18070000</td>\n <td>16570000</td>\n </tr>\n <tr>\n <th>6</th>\n <td>chr15</td>\n <td>15260000</td>\n <td>18260000</td>\n <td>16760000</td>\n </tr>\n <tr>\n <th>7</th>\n <td>chr16</td>\n <td>35143302</td>\n <td>36943302</td>\n <td>36043302</td>\n </tr>\n <tr>\n <th>8</th>\n <td>chr17</td>\n <td>22187133</td>\n <td>22287133</td>\n <td>22237133</td>\n </tr>\n <tr>\n <th>9</th>\n <td>chr18</td>\n <td>15400898</td>\n <td>16764896</td>\n <td>16082897</td>\n </tr>\n <tr>\n <th>10</th>\n <td>chr19</td>\n <td>26923622</td>\n <td>29923622</td>\n <td>28423622</td>\n </tr>\n <tr>\n <th>11</th>\n <td>chr2</td>\n <td>91689898</td>\n <td>94689898</td>\n <td>93189898</td>\n </tr>\n <tr>\n <th>12</th>\n <td>chr20</td>\n <td>26267569</td>\n <td>28033230</td>\n <td>27150399</td>\n </tr>\n <tr>\n <th>13</th>\n <td>chr21</td>\n <td>10260000</td>\n <td>13260000</td>\n <td>11760000</td>\n </tr>\n <tr>\n <th>14</th>\n <td>chr3</td>\n <td>90587544</td>\n <td>93487544</td>\n <td>92037544</td>\n </tr>\n <tr>\n <th>15</th>\n <td>chr4</td>\n <td>49354874</td>\n <td>52354874</td>\n <td>50854874</td>\n </tr>\n <tr>\n <th>16</th>\n <td>chr5</td>\n <td>46441398</td>\n <td>49441398</td>\n <td>47941398</td>\n </tr>\n <tr>\n <th>17</th>\n <td>chr6</td>\n <td>58938125</td>\n <td>61938125</td>\n <td>60438125</td>\n </tr>\n <tr>\n <th>18</th>\n <td>chr7</td>\n <td>58058273</td>\n <td>61058273</td>\n <td>59558273</td>\n </tr>\n <tr>\n <th>19</th>\n <td>chr8</td>\n <td>43958052</td>\n <td>46958052</td>\n <td>45458052</td>\n </tr>\n <tr>\n <th>20</th>\n <td>chr9</td>\n <td>47107499</td>\n <td>50107499</td>\n <td>48607499</td>\n </tr>\n <tr>\n <th>21</th>\n <td>chrX</td>\n <td>58598737</td>\n <td>61598737</td>\n <td>60098737</td>\n </tr>\n <tr>\n <th>22</th>\n <td>chrY</td>\n <td>11253954</td>\n <td>12308578</td>\n <td>11781266</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " chrom start end mid\n0 chr1 121236957 123476957 122356957\n1 chr10 39244941 41624941 40434941\n2 chr11 51450781 54450781 52950781\n3 chr12 34747961 36142961 35445461\n4 chr13 16000000 17868000 16934000\n5 chr14 15070000 18070000 16570000\n6 chr15 15260000 18260000 16760000\n7 chr16 35143302 36943302 36043302\n8 chr17 22187133 22287133 22237133\n9 chr18 15400898 16764896 16082897\n10 chr19 26923622 29923622 28423622\n11 chr2 91689898 94689898 93189898\n12 chr20 26267569 28033230 27150399\n13 chr21 10260000 13260000 11760000\n14 chr3 90587544 93487544 92037544\n15 chr4 49354874 52354874 50854874\n16 chr5 46441398 49441398 47941398\n17 chr6 58938125 61938125 60438125\n18 chr7 58058273 61058273 59558273\n19 chr8 43958052 46958052 45458052\n20 chr9 47107499 50107499 48607499\n21 chrX 58598737 61598737 60098737\n22 chrY 11253954 12308578 11781266"
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T05:11:38.939102Z",
"start_time": "2019-01-06T05:11:38.586560Z"
},
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "import io\n\nCENTROMERES = {\n 'hg18':centromeres_from_gap(\n pd.concat(\n [read_gapfile(f'http://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/chr{chrom}_gap.txt.gz')\n for chrom in list(range(1,22))+['X','Y']]\n ),\n 'hg19':fetch_centromeres_cytoband('hg19', ideo=True),\n 'hg38':fetch_centromeres_centxt('hg38'),\n ''\n)\n\n \n \n# 'mm9'\n# 'mm10':pd.read_csv(io.StringIO(\n# '''\n# '''\n# 'dm3'\n# 'dm6'\n# 'GRCm38'\n \n# 'ce10'\n# 'galGal4'\n# 'galGal5'\n# 'galGal6'\n \n# 'pombe'\n# 'w303'\n# 'sk1'\n# 'sacCer3'\n}",
"execution_count": 9,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T05:10:13.655581Z",
"start_time": "2019-01-06T05:10:13.321380Z"
},
"trusted": true
},
"cell_type": "code",
"source": "for i in fetch_centromeres_cytoband('hg19').to_csv(index=False).split('\\n'):\n print (i)",
"execution_count": 5,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "chrom,start,end,mid\nchr1,121500000,128900000,125200000\nchr10,38000000,42300000,40150000\nchr11,51600000,55700000,53650000\nchr12,33300000,38200000,35750000\nchr13,16300000,19500000,17900000\nchr14,16100000,19100000,17600000\nchr15,15800000,20700000,18250000\nchr16,34600000,38600000,36600000\nchr17,22200000,25800000,24000000\nchr18,15400000,19000000,17200000\nchr19,24400000,28600000,26500000\nchr2,90500000,96800000,93650000\nchr20,25600000,29400000,27500000\nchr21,10900000,14300000,12600000\nchr22,12200000,17900000,15050000\nchr3,87900000,93900000,90900000\nchr4,48200000,52700000,50450000\nchr5,46100000,50700000,48400000\nchr6,58700000,63300000,61000000\nchr7,58000000,61700000,59850000\nchr8,43100000,48100000,45600000\nchr9,47300000,50700000,49000000\nchrX,58100000,63000000,60550000\nchrY,11600000,13400000,12500000\n\n"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# A user-oriented centromere fetching function"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T05:11:29.283486Z",
"start_time": "2019-01-06T05:11:29.274782Z"
},
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "import urllib\n\ndef check_connectivity(reference='http://www.google.com'):\n try:\n urllib.request.urlopen(reference, timeout=1)\n return True\n except urllib.request.URLError:\n return False\n \n\ndef fetch_centromeres(db, verbose=False):\n \n # the priority goes as \n # - Local\n # - centromeres.txt\n # - cytoBandIdeo\n # - cytoBand\n # - gap.txt\n \n if db in CENTROMERES:\n return CENTROMERES[db]\n \n if not check_connectivity(reference='http://www.google.com'):\n raise RuntimeError('No internet connection!')\n \n if not check_connectivity('http://hgdownload.cse.ucsc.edu'):\n raise RuntimeError('No connection to the genome database at hgdownload.cse.ucsc.edu!')\n \n try:\n cens = fetch_centromeres_centxt(db)\n if cens and len(cens) > 0:\n return cens\n except:\n pass\n \n try:\n cens = fetch_centromeres_cytoband(db, ideo=True)\n if cens and len(cens) > 0:\n return cens\n except:\n pass\n \n try:\n cens = fetch_centromeres_cytoband(db)\n if cens and len(cens) > 0:\n return cens\n except:\n pass\n \n try:\n cens = fetch_centromeres_gap()\n if cens and len(cens) > 0:\n return cens\n\n except:\n pass\n \n return None\n ",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T05:11:43.892440Z",
"start_time": "2019-01-06T05:11:43.014405Z"
},
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "fetch_centromeres('hg18')",
"execution_count": 10,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-24T16:09:53.087452Z",
"start_time": "2018-11-24T16:09:52.884916Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_cytoband('hg18')",
"execution_count": 55,
"outputs": [
{
"ename": "ValueError",
"evalue": "No axis named start for object type <class 'pandas.core.frame.DataFrame'>",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-55-964fa65d664f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mfetch_centromeres_cytoband\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'hg18'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<ipython-input-24-e17f613c7c3a>\u001b[0m in \u001b[0;36mfetch_centromeres_cytoband\u001b[0;34m(db, ideo)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfetch_centromeres_cytoband\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mideo\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0mcytoband_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfetch_cytoband\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mideo\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 58\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcentromeres_from_cytoband\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcytoband_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<ipython-input-24-e17f613c7c3a>\u001b[0m in \u001b[0;36mcentromeres_from_cytoband\u001b[0;34m(cytoband_df, merge)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m cens = (cens[['chrom','start','end','mid']]\n\u001b[0;32m---> 52\u001b[0;31m \u001b[0;34m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'chrom'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'start'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 53\u001b[0m .reset_index(drop=True))\n\u001b[1;32m 54\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcens\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36msort_values\u001b[0;34m(self, by, axis, ascending, inplace, kind, na_position)\u001b[0m\n\u001b[1;32m 4395\u001b[0m kind='quicksort', na_position='last'):\n\u001b[1;32m 4396\u001b[0m \u001b[0minplace\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalidate_bool_kwarg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'inplace'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4397\u001b[0;31m \u001b[0maxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_axis_number\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4398\u001b[0m \u001b[0mstacklevel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;31m# Number of stack levels from df.sort_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4399\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_get_axis_number\u001b[0;34m(self, axis)\u001b[0m\n\u001b[1;32m 373\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 374\u001b[0m raise ValueError('No axis named {0} for object type {1}'\n\u001b[0;32m--> 375\u001b[0;31m .format(axis, type(self)))\n\u001b[0m\u001b[1;32m 376\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 377\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_get_axis_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: No axis named start for object type <class 'pandas.core.frame.DataFrame'>"
]
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-24T15:19:16.307843Z",
"start_time": "2018-11-24T15:19:15.455144Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_gap('galGal5')",
"execution_count": 6,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>chr1</td>\n <td>74626886</td>\n <td>75126886</td>\n <td>74876886</td>\n </tr>\n <tr>\n <th>1</th>\n <td>chr10</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>2</th>\n <td>chr11</td>\n <td>2804944</td>\n <td>3304944</td>\n <td>3054944</td>\n </tr>\n <tr>\n <th>3</th>\n <td>chr13</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>chr14</td>\n <td>15034742</td>\n <td>15534742</td>\n <td>15284742</td>\n </tr>\n <tr>\n <th>5</th>\n <td>chr17</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>6</th>\n <td>chr2</td>\n <td>52321884</td>\n <td>52821884</td>\n <td>52571884</td>\n </tr>\n <tr>\n <th>7</th>\n <td>chr22</td>\n <td>3007278</td>\n <td>3507278</td>\n <td>3257278</td>\n </tr>\n <tr>\n <th>8</th>\n <td>chr25</td>\n <td>277761</td>\n <td>777761</td>\n <td>527761</td>\n </tr>\n <tr>\n <th>9</th>\n <td>chr3</td>\n <td>11717420</td>\n <td>12217420</td>\n <td>11967420</td>\n </tr>\n <tr>\n <th>10</th>\n <td>chr4</td>\n <td>18856549</td>\n <td>19356549</td>\n <td>19106549</td>\n </tr>\n <tr>\n <th>11</th>\n <td>chr5</td>\n <td>5808100</td>\n <td>6308100</td>\n <td>6058100</td>\n </tr>\n <tr>\n <th>12</th>\n <td>chr6</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>13</th>\n <td>chr7</td>\n <td>7340970</td>\n <td>7840970</td>\n <td>7590970</td>\n </tr>\n <tr>\n <th>14</th>\n <td>chr8</td>\n <td>10505700</td>\n <td>11005700</td>\n <td>10755700</td>\n </tr>\n <tr>\n <th>15</th>\n <td>chr9</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " chrom start end mid\n0 chr1 74626886 75126886 74876886\n1 chr10 0 500000 250000\n2 chr11 2804944 3304944 3054944\n3 chr13 0 500000 250000\n4 chr14 15034742 15534742 15284742\n5 chr17 0 500000 250000\n6 chr2 52321884 52821884 52571884\n7 chr22 3007278 3507278 3257278\n8 chr25 277761 777761 527761\n9 chr3 11717420 12217420 11967420\n10 chr4 18856549 19356549 19106549\n11 chr5 5808100 6308100 6058100\n12 chr6 0 500000 250000\n13 chr7 7340970 7840970 7590970\n14 chr8 10505700 11005700 10755700\n15 chr9 0 500000 250000"
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:48:17.800492Z",
"start_time": "2018-11-22T09:48:17.600558Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_cytoband('hg19') ",
"execution_count": 77,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>chr1</td>\n <td>121500000</td>\n <td>128900000</td>\n <td>125200000</td>\n </tr>\n <tr>\n <th>1</th>\n <td>chr10</td>\n <td>38000000</td>\n <td>42300000</td>\n <td>40150000</td>\n </tr>\n <tr>\n <th>2</th>\n <td>chr11</td>\n <td>51600000</td>\n <td>55700000</td>\n <td>53650000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>chr12</td>\n <td>33300000</td>\n <td>38200000</td>\n <td>35750000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>chr13</td>\n <td>16300000</td>\n <td>19500000</td>\n <td>17900000</td>\n </tr>\n <tr>\n <th>5</th>\n <td>chr14</td>\n <td>16100000</td>\n <td>19100000</td>\n <td>17600000</td>\n </tr>\n <tr>\n <th>6</th>\n <td>chr15</td>\n <td>15800000</td>\n <td>20700000</td>\n <td>18250000</td>\n </tr>\n <tr>\n <th>7</th>\n <td>chr16</td>\n <td>34600000</td>\n <td>38600000</td>\n <td>36600000</td>\n </tr>\n <tr>\n <th>8</th>\n <td>chr17</td>\n <td>22200000</td>\n <td>25800000</td>\n <td>24000000</td>\n </tr>\n <tr>\n <th>9</th>\n <td>chr18</td>\n <td>15400000</td>\n <td>19000000</td>\n <td>17200000</td>\n </tr>\n <tr>\n <th>10</th>\n <td>chr19</td>\n <td>24400000</td>\n <td>28600000</td>\n <td>26500000</td>\n </tr>\n <tr>\n <th>11</th>\n <td>chr2</td>\n <td>90500000</td>\n <td>96800000</td>\n <td>93650000</td>\n </tr>\n <tr>\n <th>12</th>\n <td>chr20</td>\n <td>25600000</td>\n <td>29400000</td>\n <td>27500000</td>\n </tr>\n <tr>\n <th>13</th>\n <td>chr21</td>\n <td>10900000</td>\n <td>14300000</td>\n <td>12600000</td>\n </tr>\n <tr>\n <th>14</th>\n <td>chr22</td>\n <td>12200000</td>\n <td>17900000</td>\n <td>15050000</td>\n </tr>\n <tr>\n <th>15</th>\n <td>chr3</td>\n <td>87900000</td>\n <td>93900000</td>\n <td>90900000</td>\n </tr>\n <tr>\n <th>16</th>\n <td>chr4</td>\n <td>48200000</td>\n <td>52700000</td>\n <td>50450000</td>\n </tr>\n <tr>\n <th>17</th>\n <td>chr5</td>\n <td>46100000</td>\n <td>50700000</td>\n <td>48400000</td>\n </tr>\n <tr>\n <th>18</th>\n <td>chr6</td>\n <td>58700000</td>\n <td>63300000</td>\n <td>61000000</td>\n </tr>\n <tr>\n <th>19</th>\n <td>chr7</td>\n <td>58000000</td>\n <td>61700000</td>\n <td>59850000</td>\n </tr>\n <tr>\n <th>20</th>\n <td>chr8</td>\n <td>43100000</td>\n <td>48100000</td>\n <td>45600000</td>\n </tr>\n <tr>\n <th>21</th>\n <td>chr9</td>\n <td>47300000</td>\n <td>50700000</td>\n <td>49000000</td>\n </tr>\n <tr>\n <th>22</th>\n <td>chrX</td>\n <td>58100000</td>\n <td>63000000</td>\n <td>60550000</td>\n </tr>\n <tr>\n <th>23</th>\n <td>chrY</td>\n <td>11600000</td>\n <td>13400000</td>\n <td>12500000</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " chrom start end mid\n0 chr1 121500000 128900000 125200000\n1 chr10 38000000 42300000 40150000\n2 chr11 51600000 55700000 53650000\n3 chr12 33300000 38200000 35750000\n4 chr13 16300000 19500000 17900000\n5 chr14 16100000 19100000 17600000\n6 chr15 15800000 20700000 18250000\n7 chr16 34600000 38600000 36600000\n8 chr17 22200000 25800000 24000000\n9 chr18 15400000 19000000 17200000\n10 chr19 24400000 28600000 26500000\n11 chr2 90500000 96800000 93650000\n12 chr20 25600000 29400000 27500000\n13 chr21 10900000 14300000 12600000\n14 chr22 12200000 17900000 15050000\n15 chr3 87900000 93900000 90900000\n16 chr4 48200000 52700000 50450000\n17 chr5 46100000 50700000 48400000\n18 chr6 58700000 63300000 61000000\n19 chr7 58000000 61700000 59850000\n20 chr8 43100000 48100000 45600000\n21 chr9 47300000 50700000 49000000\n22 chrX 58100000 63000000 60550000\n23 chrY 11600000 13400000 12500000"
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-24T15:19:26.639500Z",
"start_time": "2018-11-24T15:19:26.443603Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_gap('hg19')",
"execution_count": 9,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>chr1</td>\n <td>121535434</td>\n <td>124535434</td>\n <td>123035434</td>\n </tr>\n <tr>\n <th>1</th>\n <td>chr10</td>\n <td>39254935</td>\n <td>42254935</td>\n <td>40754935</td>\n </tr>\n <tr>\n <th>2</th>\n <td>chr11</td>\n <td>51644205</td>\n <td>54644205</td>\n <td>53144205</td>\n </tr>\n <tr>\n <th>3</th>\n <td>chr12</td>\n <td>34856694</td>\n <td>37856694</td>\n <td>36356694</td>\n </tr>\n <tr>\n <th>4</th>\n <td>chr13</td>\n <td>16000000</td>\n <td>19000000</td>\n <td>17500000</td>\n </tr>\n <tr>\n <th>5</th>\n <td>chr14</td>\n <td>16000000</td>\n <td>19000000</td>\n <td>17500000</td>\n </tr>\n <tr>\n <th>6</th>\n <td>chr15</td>\n <td>17000000</td>\n <td>20000000</td>\n <td>18500000</td>\n </tr>\n <tr>\n <th>7</th>\n <td>chr16</td>\n <td>35335801</td>\n <td>38335801</td>\n <td>36835801</td>\n </tr>\n <tr>\n <th>8</th>\n <td>chr17</td>\n <td>22263006</td>\n <td>25263006</td>\n <td>23763006</td>\n </tr>\n <tr>\n <th>9</th>\n <td>chr18</td>\n <td>15460898</td>\n <td>18460898</td>\n <td>16960898</td>\n </tr>\n <tr>\n <th>10</th>\n <td>chr19</td>\n <td>24681782</td>\n <td>27681782</td>\n <td>26181782</td>\n </tr>\n <tr>\n <th>11</th>\n <td>chr2</td>\n <td>92326171</td>\n <td>95326171</td>\n <td>93826171</td>\n </tr>\n <tr>\n <th>12</th>\n <td>chr20</td>\n <td>26369569</td>\n <td>29369569</td>\n <td>27869569</td>\n </tr>\n <tr>\n <th>13</th>\n <td>chr21</td>\n <td>11288129</td>\n <td>14288129</td>\n <td>12788129</td>\n </tr>\n <tr>\n <th>14</th>\n <td>chr22</td>\n <td>13000000</td>\n <td>16000000</td>\n <td>14500000</td>\n </tr>\n <tr>\n <th>15</th>\n <td>chr3</td>\n <td>90504854</td>\n <td>93504854</td>\n <td>92004854</td>\n </tr>\n <tr>\n <th>16</th>\n <td>chr4</td>\n <td>49660117</td>\n <td>52660117</td>\n <td>51160117</td>\n </tr>\n <tr>\n <th>17</th>\n <td>chr5</td>\n <td>46405641</td>\n <td>49405641</td>\n <td>47905641</td>\n </tr>\n <tr>\n <th>18</th>\n <td>chr6</td>\n <td>58830166</td>\n <td>61830166</td>\n <td>60330166</td>\n </tr>\n <tr>\n <th>19</th>\n <td>chr7</td>\n <td>58054331</td>\n <td>61054331</td>\n <td>59554331</td>\n </tr>\n <tr>\n <th>20</th>\n <td>chr8</td>\n <td>43838887</td>\n <td>46838887</td>\n <td>45338887</td>\n </tr>\n <tr>\n <th>21</th>\n <td>chr9</td>\n <td>47367679</td>\n <td>50367679</td>\n <td>48867679</td>\n </tr>\n <tr>\n <th>22</th>\n <td>chrX</td>\n <td>58632012</td>\n <td>61632012</td>\n <td>60132012</td>\n </tr>\n <tr>\n <th>23</th>\n <td>chrY</td>\n <td>10104553</td>\n <td>13104553</td>\n <td>11604553</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " chrom start end mid\n0 chr1 121535434 124535434 123035434\n1 chr10 39254935 42254935 40754935\n2 chr11 51644205 54644205 53144205\n3 chr12 34856694 37856694 36356694\n4 chr13 16000000 19000000 17500000\n5 chr14 16000000 19000000 17500000\n6 chr15 17000000 20000000 18500000\n7 chr16 35335801 38335801 36835801\n8 chr17 22263006 25263006 23763006\n9 chr18 15460898 18460898 16960898\n10 chr19 24681782 27681782 26181782\n11 chr2 92326171 95326171 93826171\n12 chr20 26369569 29369569 27869569\n13 chr21 11288129 14288129 12788129\n14 chr22 13000000 16000000 14500000\n15 chr3 90504854 93504854 92004854\n16 chr4 49660117 52660117 51160117\n17 chr5 46405641 49405641 47905641\n18 chr6 58830166 61830166 60330166\n19 chr7 58054331 61054331 59554331\n20 chr8 43838887 46838887 45338887\n21 chr9 47367679 50367679 48867679\n22 chrX 58632012 61632012 60132012\n23 chrY 10104553 13104553 11604553"
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:51:31.392784Z",
"start_time": "2018-11-22T09:51:31.184897Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_gap('panTro6')",
"execution_count": 83,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>chrY</td>\n <td>7552958</td>\n <td>8052959</td>\n <td>7802958</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " chrom start end mid\n0 chrY 7552958 8052959 7802958"
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:16:48.026791Z",
"start_time": "2018-11-22T09:16:47.462041Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_cytoband('galGal5')",
"execution_count": 63,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>chr1</td>\n <td>74626886</td>\n <td>75126886</td>\n <td>74876886</td>\n </tr>\n <tr>\n <th>1</th>\n <td>chr10</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>2</th>\n <td>chr11</td>\n <td>2804944</td>\n <td>3304944</td>\n <td>3054944</td>\n </tr>\n <tr>\n <th>3</th>\n <td>chr13</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>chr14</td>\n <td>15034742</td>\n <td>15534742</td>\n <td>15284742</td>\n </tr>\n <tr>\n <th>5</th>\n <td>chr17</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>6</th>\n <td>chr2</td>\n <td>52321884</td>\n <td>52821884</td>\n <td>52571884</td>\n </tr>\n <tr>\n <th>7</th>\n <td>chr22</td>\n <td>3007278</td>\n <td>3507278</td>\n <td>3257278</td>\n </tr>\n <tr>\n <th>8</th>\n <td>chr25</td>\n <td>277761</td>\n <td>777761</td>\n <td>527761</td>\n </tr>\n <tr>\n <th>9</th>\n <td>chr3</td>\n <td>11717420</td>\n <td>12217420</td>\n <td>11967420</td>\n </tr>\n <tr>\n <th>10</th>\n <td>chr4</td>\n <td>18856549</td>\n <td>19356549</td>\n <td>19106549</td>\n </tr>\n <tr>\n <th>11</th>\n <td>chr5</td>\n <td>5808100</td>\n <td>6308100</td>\n <td>6058100</td>\n </tr>\n <tr>\n <th>12</th>\n <td>chr6</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>13</th>\n <td>chr7</td>\n <td>7340970</td>\n <td>7840970</td>\n <td>7590970</td>\n </tr>\n <tr>\n <th>14</th>\n <td>chr8</td>\n <td>10505700</td>\n <td>11005700</td>\n <td>10755700</td>\n </tr>\n <tr>\n <th>15</th>\n <td>chr9</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " chrom start end mid\n0 chr1 74626886 75126886 74876886\n1 chr10 0 500000 250000\n2 chr11 2804944 3304944 3054944\n3 chr13 0 500000 250000\n4 chr14 15034742 15534742 15284742\n5 chr17 0 500000 250000\n6 chr2 52321884 52821884 52571884\n7 chr22 3007278 3507278 3257278\n8 chr25 277761 777761 527761\n9 chr3 11717420 12217420 11967420\n10 chr4 18856549 19356549 19106549\n11 chr5 5808100 6308100 6058100\n12 chr6 0 500000 250000\n13 chr7 7340970 7840970 7590970\n14 chr8 10505700 11005700 10755700\n15 chr9 0 500000 250000"
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:50:57.881496Z",
"start_time": "2018-11-22T09:50:57.666897Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_cytoband('galGal6')",
"execution_count": 81,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>chr1</td>\n <td>76608038</td>\n <td>77108038</td>\n <td>76858038</td>\n </tr>\n <tr>\n <th>1</th>\n <td>chr10</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>2</th>\n <td>chr11</td>\n <td>2730744</td>\n <td>3230744</td>\n <td>2980744</td>\n </tr>\n <tr>\n <th>3</th>\n <td>chr13</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>chr14</td>\n <td>15478998</td>\n <td>15978998</td>\n <td>15728998</td>\n </tr>\n <tr>\n <th>5</th>\n <td>chr16</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>6</th>\n <td>chr17</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>7</th>\n <td>chr2</td>\n <td>52151238</td>\n <td>52651238</td>\n <td>52401238</td>\n </tr>\n <tr>\n <th>8</th>\n <td>chr22</td>\n <td>3070996</td>\n <td>3570996</td>\n <td>3320996</td>\n </tr>\n <tr>\n <th>9</th>\n <td>chr25</td>\n <td>740762</td>\n <td>1240762</td>\n <td>990762</td>\n </tr>\n <tr>\n <th>10</th>\n <td>chr26</td>\n <td>5555710</td>\n <td>6055710</td>\n <td>5805710</td>\n </tr>\n <tr>\n <th>11</th>\n <td>chr3</td>\n <td>5801976</td>\n <td>6301976</td>\n <td>6051976</td>\n </tr>\n <tr>\n <th>12</th>\n <td>chr4</td>\n <td>18761804</td>\n <td>19261804</td>\n <td>19011804</td>\n </tr>\n <tr>\n <th>13</th>\n <td>chr5</td>\n <td>6009601</td>\n <td>6509601</td>\n <td>6259601</td>\n </tr>\n <tr>\n <th>14</th>\n <td>chr6</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n <tr>\n <th>15</th>\n <td>chr7</td>\n <td>7331533</td>\n <td>7831533</td>\n <td>7581533</td>\n </tr>\n <tr>\n <th>16</th>\n <td>chr8</td>\n <td>9326743</td>\n <td>9826743</td>\n <td>9576743</td>\n </tr>\n <tr>\n <th>17</th>\n <td>chr9</td>\n <td>0</td>\n <td>500000</td>\n <td>250000</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " chrom start end mid\n0 chr1 76608038 77108038 76858038\n1 chr10 0 500000 250000\n2 chr11 2730744 3230744 2980744\n3 chr13 0 500000 250000\n4 chr14 15478998 15978998 15728998\n5 chr16 0 500000 250000\n6 chr17 0 500000 250000\n7 chr2 52151238 52651238 52401238\n8 chr22 3070996 3570996 3320996\n9 chr25 740762 1240762 990762\n10 chr26 5555710 6055710 5805710\n11 chr3 5801976 6301976 6051976\n12 chr4 18761804 19261804 19011804\n13 chr5 6009601 6509601 6259601\n14 chr6 0 500000 250000\n15 chr7 7331533 7831533 7581533\n16 chr8 9326743 9826743 9576743\n17 chr9 0 500000 250000"
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:52:38.359138Z",
"start_time": "2018-11-22T09:52:37.076130Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_gap('danRer11')",
"execution_count": 84,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>\n</div>",
"text/plain": "Empty DataFrame\nColumns: [chrom, start, end, mid]\nIndex: []"
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:52:49.723721Z",
"start_time": "2018-11-22T09:52:49.511937Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_cytoband('danRer11')",
"execution_count": 85,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>\n</div>",
"text/plain": "Empty DataFrame\nColumns: [chrom, start, end, mid]\nIndex: []"
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:30:54.201460Z",
"start_time": "2018-11-22T09:30:53.988732Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_cytoband('mm10') ",
"execution_count": 69,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>\n</div>",
"text/plain": "Empty DataFrame\nColumns: [chrom, start, end, mid]\nIndex: []"
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:32:26.741263Z",
"start_time": "2018-11-22T09:32:26.555111Z"
},
"trusted": true
},
"cell_type": "code",
"source": "cb=fetch_cytoband('mm10',ideo=False)\ncb.gieStain.unique()",
"execution_count": 73,
"outputs": [
{
"data": {
"text/plain": "array(['gpos100', 'gneg', 'gpos33', 'gpos66', 'gpos75'], dtype=object)"
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2019-01-06T05:20:25.348714Z",
"start_time": "2019-01-06T05:20:25.147241Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_gap('mm10')",
"execution_count": 11,
"outputs": [
{
"data": {
"text/html": "<div>\n<style>\n .dataframe thead tr:only-child th {\n text-align: right;\n }\n\n .dataframe thead th {\n text-align: left;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>chr1</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>1</th>\n <td>chr10</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>2</th>\n <td>chr11</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>chr12</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>chr13</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>5</th>\n <td>chr14</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>6</th>\n <td>chr15</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>7</th>\n <td>chr16</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>8</th>\n <td>chr17</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>9</th>\n <td>chr18</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>10</th>\n <td>chr19</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>11</th>\n <td>chr2</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>12</th>\n <td>chr3</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>13</th>\n <td>chr4</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>14</th>\n <td>chr5</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>15</th>\n <td>chr6</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>16</th>\n <td>chr7</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>17</th>\n <td>chr8</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>18</th>\n <td>chr9</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n <tr>\n <th>19</th>\n <td>chrX</td>\n <td>110000</td>\n <td>3000000</td>\n <td>1555000</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " chrom start end mid\n0 chr1 110000 3000000 1555000\n1 chr10 110000 3000000 1555000\n2 chr11 110000 3000000 1555000\n3 chr12 110000 3000000 1555000\n4 chr13 110000 3000000 1555000\n5 chr14 110000 3000000 1555000\n6 chr15 110000 3000000 1555000\n7 chr16 110000 3000000 1555000\n8 chr17 110000 3000000 1555000\n9 chr18 110000 3000000 1555000\n10 chr19 110000 3000000 1555000\n11 chr2 110000 3000000 1555000\n12 chr3 110000 3000000 1555000\n13 chr4 110000 3000000 1555000\n14 chr5 110000 3000000 1555000\n15 chr6 110000 3000000 1555000\n16 chr7 110000 3000000 1555000\n17 chr8 110000 3000000 1555000\n18 chr9 110000 3000000 1555000\n19 chrX 110000 3000000 1555000"
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:31:02.427049Z",
"start_time": "2018-11-22T09:31:02.230466Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_cytoband('mm9') ",
"execution_count": 70,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>\n</div>",
"text/plain": "Empty DataFrame\nColumns: [chrom, start, end, mid]\nIndex: []"
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:18:48.133028Z",
"start_time": "2018-11-22T09:18:47.931880Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_cytoband('sacCer3') ",
"execution_count": 66,
"outputs": [
{
"ename": "HTTPError",
"evalue": "HTTP Error 404: Not Found",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-66-2aa7e2403778>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mfetch_centromeres_cytoband\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'sacCer3'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<ipython-input-61-193fd0553319>\u001b[0m in \u001b[0;36mfetch_centromeres_cytoband\u001b[0;34m(db, ideo)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfetch_centromeres_cytoband\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mideo\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mcytoband\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfetch_cytoband\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdb\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mideo\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mcens\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcytoband\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcytoband\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgieStain\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0;34m'acen'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-60-f0d5fa5cd4a3>\u001b[0m in \u001b[0;36mfetch_cytoband\u001b[0;34m(db, ideo, **kwargs)\u001b[0m\n\u001b[1;32m 20\u001b[0m return read_cytoband(\n\u001b[1;32m 21\u001b[0m \u001b[0;34m'http://hgdownload.cse.ucsc.edu/goldenPath/{}/database/cytoBandIdeo.txt.gz'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m **kwargs)\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m return read_cytoband(\n",
"\u001b[0;32m<ipython-input-60-f0d5fa5cd4a3>\u001b[0m in \u001b[0;36mread_cytoband\u001b[0;34m(filepath_or_fp, chroms, **kwargs)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0msep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'\\t'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mCYTOBAND_FIELDS\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m **kwargs)\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchroms\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mcytoband\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcytoband\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcytoband\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchrom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchroms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 676\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[1;32m 677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 678\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 679\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 680\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 422\u001b[0m \u001b[0mcompression\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_infer_compression\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcompression\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 423\u001b[0m filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(\n\u001b[0;32m--> 424\u001b[0;31m filepath_or_buffer, encoding, compression)\n\u001b[0m\u001b[1;32m 425\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'compression'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompression\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/site-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mget_filepath_or_buffer\u001b[0;34m(filepath_or_buffer, encoding, compression, mode)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 194\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 195\u001b[0;31m \u001b[0mreq\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_urlopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 196\u001b[0m \u001b[0mcontent_encoding\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Content-Encoding'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcontent_encoding\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'gzip'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/urllib/request.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(url, data, timeout, cafile, capath, cadefault, context)\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0mopener\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_opener\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 223\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mopener\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 224\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minstall_opener\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/urllib/request.py\u001b[0m in \u001b[0;36mopen\u001b[0;34m(self, fullurl, data, timeout)\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mprocessor\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_response\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 531\u001b[0m \u001b[0mmeth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprocessor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeth_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 532\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmeth\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 533\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/urllib/request.py\u001b[0m in \u001b[0;36mhttp_response\u001b[0;34m(self, request, response)\u001b[0m\n\u001b[1;32m 640\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m200\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0mcode\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m300\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 641\u001b[0m response = self.parent.error(\n\u001b[0;32m--> 642\u001b[0;31m 'http', request, response, code, msg, hdrs)\n\u001b[0m\u001b[1;32m 643\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 644\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/urllib/request.py\u001b[0m in \u001b[0;36merror\u001b[0;34m(self, proto, *args)\u001b[0m\n\u001b[1;32m 568\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhttp_err\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 569\u001b[0m \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'default'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'http_error_default'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0morig_args\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 570\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_chain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 571\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 572\u001b[0m \u001b[0;31m# XXX probably also want an abstract factory that knows when it makes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/urllib/request.py\u001b[0m in \u001b[0;36m_call_chain\u001b[0;34m(self, chain, kind, meth_name, *args)\u001b[0m\n\u001b[1;32m 502\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhandler\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mhandlers\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 503\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhandler\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeth_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 504\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 505\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 506\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/golobor/miniconda3/lib/python3.6/urllib/request.py\u001b[0m in \u001b[0;36mhttp_error_default\u001b[0;34m(self, req, fp, code, msg, hdrs)\u001b[0m\n\u001b[1;32m 648\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mHTTPDefaultErrorHandler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mBaseHandler\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 649\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mhttp_error_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhdrs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 650\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mHTTPError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhdrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 651\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 652\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mHTTPRedirectHandler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mBaseHandler\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mHTTPError\u001b[0m: HTTP Error 404: Not Found"
]
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:18:58.594076Z",
"start_time": "2018-11-22T09:18:58.387458Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_gap('sacCer3') ",
"execution_count": 67,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>\n</div>",
"text/plain": "Empty DataFrame\nColumns: [chrom, start, end, mid]\nIndex: []"
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:28:47.583595Z",
"start_time": "2018-11-22T09:28:47.379947Z"
},
"trusted": true
},
"cell_type": "code",
"source": "# c elegans are holocentric: https://en.wikipedia.org/wiki/Centromere\nfetch_centromeres_cytoband('ce11') \n",
"execution_count": 68,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>\n</div>",
"text/plain": "Empty DataFrame\nColumns: [chrom, start, end, mid]\nIndex: []"
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-22T09:50:17.157014Z",
"start_time": "2018-11-22T09:50:14.559547Z"
},
"trusted": true
},
"cell_type": "code",
"source": "fetch_centromeres_gap('xenLae2') \n",
"execution_count": 80,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>chrom</th>\n <th>start</th>\n <th>end</th>\n <th>mid</th>\n </tr>\n </thead>\n <tbody>\n </tbody>\n</table>\n</div>",
"text/plain": "Empty DataFrame\nColumns: [chrom, start, end, mid]\nIndex: []"
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
]
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/2d0dbeb331899aa1bb68912c751f2f4f"
},
"gist": {
"id": "2d0dbeb331899aa1bb68912c751f2f4f",
"data": {
"description": "centromere_downloaded",
"public": true
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.8",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"nav_menu": {},
"toc": {
"threshold": 6,
"number_sections": true,
"toc_cell": true,
"toc_window_display": true,
"toc_section_display": "block",
"sideBar": true,
"navigate_menu": true
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment