Skip to content

Instantly share code, notes, and snippets.

@nvictus
Created August 23, 2023 14:10
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nvictus/6acd49d53001feed268cfe3193a017b0 to your computer and use it in GitHub Desktop.
Save nvictus/6acd49d53001feed268cfe3193a017b0 to your computer and use it in GitHub Desktop.
File-likes in pyoxbow
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import oxbow as ox\n",
"import polars as pl\n",
"import smart_open"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## File-like on HTSlib"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
"}\n",
"</style>\n",
"<small>shape: (62_042, 9)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>chrom</th><th>pos</th><th>id</th><th>ref</th><th>alt</th><th>qual</th><th>filter</th><th>info</th><th>format</th></tr><tr><td>cat</td><td>i32</td><td>str</td><td>str</td><td>str</td><td>f32</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>&quot;Y&quot;</td><td>2655180</td><td>&quot;rs11575897&quot;</td><td>&quot;G&quot;</td><td>&quot;A&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=G;AC=22;AF=…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2655471</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;C&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=5;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2655754</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;T&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2655800</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;G&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2655989</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;G&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2655994</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;G&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2656126</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;T&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2656127</td><td>&quot;&quot;</td><td>&quot;G&quot;</td><td>&quot;C&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=G;AC=14;AF=…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2656276</td><td>&quot;&quot;</td><td>&quot;G&quot;</td><td>&quot;A&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=G;AC=2;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2656677</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;G&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2657176</td><td>&quot;rs2534636&quot;</td><td>&quot;C&quot;</td><td>&quot;T&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=T;AC=89;AF=…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2657205</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;G&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;Y&quot;</td><td>28769764</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;T&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28769939</td><td>&quot;&quot;</td><td>&quot;T&quot;</td><td>&quot;C&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=T;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770057</td><td>&quot;&quot;</td><td>&quot;G&quot;</td><td>&quot;A&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=G;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770125</td><td>&quot;&quot;</td><td>&quot;T&quot;</td><td>&quot;C&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=T;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770156</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;C&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770565</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;T&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770633</td><td>&quot;&quot;</td><td>&quot;T&quot;</td><td>&quot;TA&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=T;AC=4;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770651</td><td>&quot;&quot;</td><td>&quot;G&quot;</td><td>&quot;A&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=G;AC=14;AF=…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770656</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;G&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=16;AF=…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770756</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;G&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=9;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770875</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;G&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=3;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770931</td><td>&quot;&quot;</td><td>&quot;T&quot;</td><td>&quot;C&quot;</td><td>100.0</td><td>&quot;PASS&quot;</td><td>&quot;AA=T;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (62_042, 9)\n",
"┌───────┬──────────┬────────────┬─────┬───┬───────┬────────┬──────────────────────────────┬────────┐\n",
"│ chrom ┆ pos ┆ id ┆ ref ┆ … ┆ qual ┆ filter ┆ info ┆ format │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ cat ┆ i32 ┆ str ┆ str ┆ ┆ f32 ┆ str ┆ str ┆ str │\n",
"╞═══════╪══════════╪════════════╪═════╪═══╪═══════╪════════╪══════════════════════════════╪════════╡\n",
"│ Y ┆ 2655180 ┆ rs11575897 ┆ G ┆ … ┆ 100.0 ┆ PASS ┆ AA=G;AC=22;AF=0.0178427;AN=1 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n",
"│ Y ┆ 2655471 ┆ ┆ A ┆ … ┆ 100.0 ┆ PASS ┆ AA=A;AC=5;AF=0.00405515;AN=1 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n",
"│ Y ┆ 2655754 ┆ ┆ A ┆ … ┆ 100.0 ┆ PASS ┆ AA=A;AC=1;AF=0.00081103;AN=1 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n",
"│ Y ┆ 2655800 ┆ ┆ A ┆ … ┆ 100.0 ┆ PASS ┆ AA=A;AC=1;AF=0.00081103;AN=1 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ Y ┆ 28770656 ┆ ┆ A ┆ … ┆ 100.0 ┆ PASS ┆ AA=A;AC=16;AF=0.0139616;AN=1 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 146;… ┆ │\n",
"│ Y ┆ 28770756 ┆ ┆ C ┆ … ┆ 100.0 ┆ PASS ┆ AA=C;AC=9;AF=0.00729927;AN=1 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n",
"│ Y ┆ 28770875 ┆ ┆ C ┆ … ┆ 100.0 ┆ PASS ┆ AA=C;AC=3;AF=0.00243309;AN=1 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n",
"│ Y ┆ 28770931 ┆ ┆ T ┆ … ┆ 100.0 ┆ PASS ┆ AA=T;AC=1;AF=0.00081103;AN=1 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 233;… ┆ │\n",
"└───────┴──────────┴────────────┴─────┴───┴───────┴────────┴──────────────────────────────┴────────┘"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f = open(\"../fixtures/ALL.chrY.phase3_integrated_v1a.20130502.genotypes.vcf.gz\", \"rb\")\n",
"g = open(\"../fixtures/ALL.chrY.phase3_integrated_v1a.20130502.genotypes.vcf.gz.tbi\", \"rb\")\n",
"ipc = ox.read_vcf(f, index=g)\n",
"pl.read_ipc(ipc)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
"}\n",
"</style>\n",
"<small>shape: (62_042, 9)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>chrom</th><th>pos</th><th>id</th><th>ref</th><th>alt</th><th>qual</th><th>filter</th><th>info</th><th>format</th></tr><tr><td>cat</td><td>i32</td><td>str</td><td>str</td><td>str</td><td>f32</td><td>str</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>&quot;Y&quot;</td><td>2655180</td><td>&quot;rs11575897&quot;</td><td>&quot;G&quot;</td><td>&quot;A&quot;</td><td>34439.5</td><td>&quot;PASS&quot;</td><td>&quot;AA=G;AC=22;AF=…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2655471</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;C&quot;</td><td>7014.370117</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=5;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2655754</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;T&quot;</td><td>238.684006</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2655800</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;G&quot;</td><td>6.08381</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2655989</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;G&quot;</td><td>71.4925</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2655994</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;G&quot;</td><td>4.55831</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2656126</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;T&quot;</td><td>42.006199</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2656127</td><td>&quot;&quot;</td><td>&quot;G&quot;</td><td>&quot;C&quot;</td><td>539.66803</td><td>&quot;PASS&quot;</td><td>&quot;AA=G;AC=14;AF=…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2656276</td><td>&quot;&quot;</td><td>&quot;G&quot;</td><td>&quot;A&quot;</td><td>153.227005</td><td>&quot;PASS&quot;</td><td>&quot;AA=G;AC=2;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2656677</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;G&quot;</td><td>82.752701</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2657176</td><td>&quot;rs2534636&quot;</td><td>&quot;C&quot;</td><td>&quot;T&quot;</td><td>11169.099609</td><td>&quot;PASS&quot;</td><td>&quot;AA=T;AC=89;AF=…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>2657205</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;G&quot;</td><td>206.488007</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;Y&quot;</td><td>28769764</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;T&quot;</td><td>1224.76001</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28769939</td><td>&quot;&quot;</td><td>&quot;T&quot;</td><td>&quot;C&quot;</td><td>65.424599</td><td>&quot;PASS&quot;</td><td>&quot;AA=T;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770057</td><td>&quot;&quot;</td><td>&quot;G&quot;</td><td>&quot;A&quot;</td><td>22.034401</td><td>&quot;PASS&quot;</td><td>&quot;AA=G;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770125</td><td>&quot;&quot;</td><td>&quot;T&quot;</td><td>&quot;C&quot;</td><td>51.250301</td><td>&quot;PASS&quot;</td><td>&quot;AA=T;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770156</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;C&quot;</td><td>9.07688</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770565</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;T&quot;</td><td>27.900499</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770633</td><td>&quot;&quot;</td><td>&quot;T&quot;</td><td>&quot;TA&quot;</td><td>388.31601</td><td>&quot;PASS&quot;</td><td>&quot;AA=T;AC=4;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770651</td><td>&quot;&quot;</td><td>&quot;G&quot;</td><td>&quot;A&quot;</td><td>152.626999</td><td>&quot;PASS&quot;</td><td>&quot;AA=G;AC=14;AF=…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770656</td><td>&quot;&quot;</td><td>&quot;A&quot;</td><td>&quot;G&quot;</td><td>239.639999</td><td>&quot;PASS&quot;</td><td>&quot;AA=A;AC=16;AF=…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770756</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;G&quot;</td><td>718.411011</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=9;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770875</td><td>&quot;&quot;</td><td>&quot;C&quot;</td><td>&quot;G&quot;</td><td>261.834015</td><td>&quot;PASS&quot;</td><td>&quot;AA=C;AC=3;AF=0…</td><td>&quot;GT&quot;</td></tr><tr><td>&quot;Y&quot;</td><td>28770931</td><td>&quot;&quot;</td><td>&quot;T&quot;</td><td>&quot;C&quot;</td><td>59.303299</td><td>&quot;PASS&quot;</td><td>&quot;AA=T;AC=1;AF=0…</td><td>&quot;GT&quot;</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (62_042, 9)\n",
"┌───────┬──────────┬────────────┬─────┬───┬─────────────┬────────┬────────────────────────┬────────┐\n",
"│ chrom ┆ pos ┆ id ┆ ref ┆ … ┆ qual ┆ filter ┆ info ┆ format │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ cat ┆ i32 ┆ str ┆ str ┆ ┆ f32 ┆ str ┆ str ┆ str │\n",
"╞═══════╪══════════╪════════════╪═════╪═══╪═════════════╪════════╪════════════════════════╪════════╡\n",
"│ Y ┆ 2655180 ┆ rs11575897 ┆ G ┆ … ┆ 34439.5 ┆ PASS ┆ AA=G;AC=22;AF=0.017842 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 7;AN=1233;… ┆ │\n",
"│ Y ┆ 2655471 ┆ ┆ A ┆ … ┆ 7014.370117 ┆ PASS ┆ AA=A;AC=5;AF=0.0040551 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 5;AN=1233;… ┆ │\n",
"│ Y ┆ 2655754 ┆ ┆ A ┆ … ┆ 238.684006 ┆ PASS ┆ AA=A;AC=1;AF=0.0008110 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 3;AN=1233;… ┆ │\n",
"│ Y ┆ 2655800 ┆ ┆ A ┆ … ┆ 6.08381 ┆ PASS ┆ AA=A;AC=1;AF=0.0008110 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 3;AN=1233;… ┆ │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ Y ┆ 28770656 ┆ ┆ A ┆ … ┆ 239.639999 ┆ PASS ┆ AA=A;AC=16;AF=0.013961 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 6;AN=1146;… ┆ │\n",
"│ Y ┆ 28770756 ┆ ┆ C ┆ … ┆ 718.411011 ┆ PASS ┆ AA=C;AC=9;AF=0.0072992 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 7;AN=1233;… ┆ │\n",
"│ Y ┆ 28770875 ┆ ┆ C ┆ … ┆ 261.834015 ┆ PASS ┆ AA=C;AC=3;AF=0.0024330 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 9;AN=1233;… ┆ │\n",
"│ Y ┆ 28770931 ┆ ┆ T ┆ … ┆ 59.303299 ┆ PASS ┆ AA=T;AC=1;AF=0.0008110 ┆ GT │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 3;AN=1233;… ┆ │\n",
"└───────┴──────────┴────────────┴─────┴───┴─────────────┴────────┴────────────────────────┴────────┘"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f = open(\"../fixtures/ALL.chrY.phase3_shapeit2_mvncall_integrated.20130502.genotypes.bcf\", \"rb\")\n",
"g = open(\"../fixtures/ALL.chrY.phase3_shapeit2_mvncall_integrated.20130502.genotypes.bcf.csi\", \"rb\")\n",
"ipc = ox.read_bcf(f, index=g)\n",
"pl.read_ipc(ipc)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
"}\n",
"</style>\n",
"<small>shape: (6, 12)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>qname</th><th>flag</th><th>rname</th><th>pos</th><th>mapq</th><th>cigar</th><th>rnext</th><th>pnext</th><th>tlen</th><th>seq</th><th>qual</th><th>end</th></tr><tr><td>str</td><td>u16</td><td>cat</td><td>i32</td><td>u8</td><td>str</td><td>cat</td><td>i32</td><td>i32</td><td>str</td><td>str</td><td>i32</td></tr></thead><tbody><tr><td>&quot;SOLEXA-1GA-2_2…</td><td>0</td><td>&quot;chr1&quot;</td><td>10145</td><td>25</td><td>&quot;36M&quot;</td><td>null</td><td>null</td><td>0</td><td>&quot;AACCCCTAACCCTA…</td><td>&quot;hhhhHcWhhHTghc…</td><td>10180</td></tr><tr><td>&quot;SOLEXA-1GA-2_2…</td><td>0</td><td>&quot;chr1&quot;</td><td>10148</td><td>25</td><td>&quot;36M&quot;</td><td>null</td><td>null</td><td>0</td><td>&quot;CCCTAACCCTAACC…</td><td>&quot;hbfhhhXUYhT_UL…</td><td>10183</td></tr><tr><td>&quot;SOLEXA-1GA-2_2…</td><td>0</td><td>&quot;chr1&quot;</td><td>999994</td><td>25</td><td>&quot;36M&quot;</td><td>null</td><td>null</td><td>0</td><td>&quot;GGGCCGTGGGCACA…</td><td>&quot;hhhYcaJdhSAMXN…</td><td>1000029</td></tr><tr><td>&quot;SOLEXA-1GA-2_2…</td><td>0</td><td>&quot;chr1&quot;</td><td>1002179</td><td>25</td><td>&quot;36M&quot;</td><td>null</td><td>null</td><td>0</td><td>&quot;CTGCGGGCAAAGAG…</td><td>&quot;hhhhhhcOLTIXJL…</td><td>1002214</td></tr><tr><td>&quot;SOLEXA-1GA-2_2…</td><td>16</td><td>&quot;chr3&quot;</td><td>79027</td><td>25</td><td>&quot;36M&quot;</td><td>null</td><td>null</td><td>0</td><td>&quot;CTCTGCGCCTGGCT…</td><td>&quot;?IMR@RTVP\\N^hh…</td><td>79062</td></tr><tr><td>&quot;SOLEXA-1GA-2_2…</td><td>16</td><td>&quot;chr3&quot;</td><td>86026</td><td>25</td><td>&quot;36M&quot;</td><td>null</td><td>null</td><td>0</td><td>&quot;TGTTAGTGTGTGAT…</td><td>&quot;IKLD&gt;GBKOKJNBB…</td><td>86061</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (6, 12)\n",
"┌─────────────────┬──────┬───────┬─────────┬───┬──────┬─────────────────┬────────────────┬─────────┐\n",
"│ qname ┆ flag ┆ rname ┆ pos ┆ … ┆ tlen ┆ seq ┆ qual ┆ end │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ u16 ┆ cat ┆ i32 ┆ ┆ i32 ┆ str ┆ str ┆ i32 │\n",
"╞═════════════════╪══════╪═══════╪═════════╪═══╪══════╪═════════════════╪════════════════╪═════════╡\n",
"│ SOLEXA-1GA-2_2_ ┆ 0 ┆ chr1 ┆ 10145 ┆ … ┆ 0 ┆ AACCCCTAACCCTAA ┆ hhhhHcWhhHTghc ┆ 10180 │\n",
"│ FC20EMB:5:251:9 ┆ ┆ ┆ ┆ ┆ ┆ CCCTAACCCTAACCC ┆ KA_ONhAAEEBZE? ┆ │\n",
"│ 79… ┆ ┆ ┆ ┆ ┆ ┆ TA… ┆ H?CB… ┆ │\n",
"│ SOLEXA-1GA-2_2_ ┆ 0 ┆ chr1 ┆ 10148 ┆ … ┆ 0 ┆ CCCTAACCCTAACCC ┆ hbfhhhXUYhT_UL ┆ 10183 │\n",
"│ FC20EMB:5:102:2 ┆ ┆ ┆ ┆ ┆ ┆ TAACCCTAACCCTAA ┆ ZdLRTKNIMIKGLJ ┆ │\n",
"│ 14… ┆ ┆ ┆ ┆ ┆ ┆ CC… ┆ CHFF… ┆ │\n",
"│ SOLEXA-1GA-2_2_ ┆ 0 ┆ chr1 ┆ 999994 ┆ … ┆ 0 ┆ GGGCCGTGGGCACAG ┆ hhhYcaJdhSAMXN ┆ 1000029 │\n",
"│ FC20EMB:5:116:7 ┆ ┆ ┆ ┆ ┆ ┆ CCTCACCCAGGAAAG ┆ SMAPHBI?ECIBDC ┆ │\n",
"│ 91… ┆ ┆ ┆ ┆ ┆ ┆ CA… ┆ =CEA… ┆ │\n",
"│ SOLEXA-1GA-2_2_ ┆ 0 ┆ chr1 ┆ 1002179 ┆ … ┆ 0 ┆ CTGCGGGCAAAGAGG ┆ hhhhhhcOLTIXJL ┆ 1002214 │\n",
"│ FC20EMB:5:20:52 ┆ ┆ ┆ ┆ ┆ ┆ CAGGGGGAGGCCCCC ┆ QJDQPJ>L=IBDCB ┆ │\n",
"│ 3:… ┆ ┆ ┆ ┆ ┆ ┆ GA… ┆ >?A?… ┆ │\n",
"│ SOLEXA-1GA-2_2_ ┆ 16 ┆ chr3 ┆ 79027 ┆ … ┆ 0 ┆ CTCTGCGCCTGGCTA ┆ ?IMR@RTVP\\N^hh ┆ 79062 │\n",
"│ FC20EMB:5:276:8 ┆ ┆ ┆ ┆ ┆ ┆ ATTTTTGTATTTTTA ┆ bh]hUhhhRhhhhh ┆ │\n",
"│ 95… ┆ ┆ ┆ ┆ ┆ ┆ GT… ┆ hhhh… ┆ │\n",
"│ SOLEXA-1GA-2_2_ ┆ 16 ┆ chr3 ┆ 86026 ┆ … ┆ 0 ┆ TGTTAGTGTGTGATT ┆ IKLD>GBKOKJNBB ┆ 86061 │\n",
"│ FC20EMB:5:229:3 ┆ ┆ ┆ ┆ ┆ ┆ GTGTGTGTGTGTGTG ┆ LORVZehhegV_Qh ┆ │\n",
"│ 13… ┆ ┆ ┆ ┆ ┆ ┆ TG… ┆ hhhh… ┆ │\n",
"└─────────────────┴──────┴───────┴─────────┴───┴──────┴─────────────────┴────────────────┴─────────┘"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f = open(\"../fixtures/sample.bam\", \"rb\")\n",
"g = open(\"../fixtures/sample.bam.bai\", \"rb\")\n",
"ipc = ox.read_bam(f, index=g)\n",
"pl.read_ipc(ipc)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
"}\n",
"</style>\n",
"<small>shape: (160_178, 12)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>qname</th><th>flag</th><th>rname</th><th>pos</th><th>mapq</th><th>cigar</th><th>rnext</th><th>pnext</th><th>tlen</th><th>seq</th><th>qual</th><th>end</th></tr><tr><td>str</td><td>u16</td><td>cat</td><td>i32</td><td>u8</td><td>str</td><td>cat</td><td>i32</td><td>i32</td><td>str</td><td>str</td><td>i32</td></tr></thead><tbody><tr><td>&quot;SRR4435251::::…</td><td>83</td><td>&quot;chr1&quot;</td><td>82736</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82517</td><td>-319</td><td>&quot;TAAAAAAGAATGCA…</td><td>&quot;??????????????…</td><td>82835</td></tr><tr><td>&quot;SRR4435251::::…</td><td>147</td><td>&quot;chr1&quot;</td><td>82742</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82511</td><td>-331</td><td>&quot;AGAATGCAGATATT…</td><td>&quot;??????????????…</td><td>82841</td></tr><tr><td>&quot;SRR4435251::::…</td><td>163</td><td>&quot;chr1&quot;</td><td>82744</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82982</td><td>338</td><td>&quot;AATGCAGATATTAC…</td><td>&quot;??????????????…</td><td>82843</td></tr><tr><td>&quot;SRR4435251::::…</td><td>83</td><td>&quot;chr1&quot;</td><td>82748</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82488</td><td>-360</td><td>&quot;CAGATATTACAAAA…</td><td>&quot;??????????????…</td><td>82847</td></tr><tr><td>&quot;SRR4435251::::…</td><td>147</td><td>&quot;chr1&quot;</td><td>82749</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82506</td><td>-343</td><td>&quot;AGATATTACAAAAC…</td><td>&quot;??????????????…</td><td>82848</td></tr><tr><td>&quot;SRR4435251::::…</td><td>147</td><td>&quot;chr1&quot;</td><td>82757</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82506</td><td>-351</td><td>&quot;CAAAACCAGTTTAC…</td><td>&quot;??????????????…</td><td>82856</td></tr><tr><td>&quot;SRR4435251::::…</td><td>83</td><td>&quot;chr1&quot;</td><td>82759</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82487</td><td>-372</td><td>&quot;AAACCAGTTTACAA…</td><td>&quot;?????????5????…</td><td>82858</td></tr><tr><td>&quot;SRR4435251::::…</td><td>147</td><td>&quot;chr1&quot;</td><td>82771</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82342</td><td>-529</td><td>&quot;AAAAGTTACTAAAC…</td><td>&quot;????????5?5???…</td><td>82870</td></tr><tr><td>&quot;SRR4435251::::…</td><td>83</td><td>&quot;chr1&quot;</td><td>82781</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82360</td><td>-521</td><td>&quot;AAACAAATAAAAAC…</td><td>&quot;??????????????…</td><td>82880</td></tr><tr><td>&quot;SRR4435251::::…</td><td>177</td><td>&quot;chr1&quot;</td><td>82781</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82816</td><td>0</td><td>&quot;AAACAAATAAAAAC…</td><td>&quot;?????5++?5&#x27;5+?…</td><td>82880</td></tr><tr><td>&quot;SRR4435251::::…</td><td>83</td><td>&quot;chr1&quot;</td><td>82790</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82492</td><td>-398</td><td>&quot;AAAACTACATCCCA…</td><td>&quot;??????????????…</td><td>82889</td></tr><tr><td>&quot;SRR4435251::::…</td><td>83</td><td>&quot;chr1&quot;</td><td>82790</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>82489</td><td>-401</td><td>&quot;AAAACTACATCCCA…</td><td>&quot;??????????????…</td><td>82889</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;SRR4435251::::…</td><td>163</td><td>&quot;chr1&quot;</td><td>528781</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>529030</td><td>349</td><td>&quot;TGATCAGTCCTTGT…</td><td>&quot;??????????????…</td><td>528880</td></tr><tr><td>&quot;SRR4435251::::…</td><td>163</td><td>&quot;chr1&quot;</td><td>528781</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>529022</td><td>341</td><td>&quot;TGATCAGTCCTTGT…</td><td>&quot;??????????????…</td><td>528880</td></tr><tr><td>&quot;SRR4435251::::…</td><td>99</td><td>&quot;chr1&quot;</td><td>528786</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>528833</td><td>147</td><td>&quot;AGTCCTTGTCTGGT…</td><td>&quot;??????????????…</td><td>528885</td></tr><tr><td>&quot;SRR4435251::::…</td><td>99</td><td>&quot;chr1&quot;</td><td>528786</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>529077</td><td>391</td><td>&quot;AGTCCTTGTCTGGT…</td><td>&quot;??????????????…</td><td>528885</td></tr><tr><td>&quot;SRR4435251::::…</td><td>83</td><td>&quot;chr1&quot;</td><td>528788</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>528591</td><td>-297</td><td>&quot;TCCTTGTCTGGTCT…</td><td>&quot;??????????????…</td><td>528887</td></tr><tr><td>&quot;SRR4435251::::…</td><td>99</td><td>&quot;chr1&quot;</td><td>528790</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>529022</td><td>332</td><td>&quot;CTTGTCTGGTCTGG…</td><td>&quot;???????????5??…</td><td>528889</td></tr><tr><td>&quot;SRR4435251::::…</td><td>99</td><td>&quot;chr1&quot;</td><td>528790</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>529006</td><td>316</td><td>&quot;CTTGTCTGGTCTGG…</td><td>&quot;??????????????…</td><td>528889</td></tr><tr><td>&quot;SRR4435251::::…</td><td>147</td><td>&quot;chr1&quot;</td><td>528790</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>528599</td><td>-291</td><td>&quot;CTTGTCTGGTCTGG…</td><td>&quot;??????????????…</td><td>528889</td></tr><tr><td>&quot;SRR4435251::::…</td><td>83</td><td>&quot;chr1&quot;</td><td>528792</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>528483</td><td>-409</td><td>&quot;TGTCTGGTCTGGCT…</td><td>&quot;??????????????…</td><td>528891</td></tr><tr><td>&quot;SRR4435251::::…</td><td>83</td><td>&quot;chr1&quot;</td><td>528800</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>528695</td><td>-205</td><td>&quot;CTGGCTCTGCCCCA…</td><td>&quot;??5????????5?5…</td><td>528899</td></tr><tr><td>&quot;SRR4435251::::…</td><td>147</td><td>&quot;chr1&quot;</td><td>528803</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>528567</td><td>-336</td><td>&quot;GCTCTGCCCCACTC…</td><td>&quot;????????5?5???…</td><td>528902</td></tr><tr><td>&quot;SRR4435251::::…</td><td>99</td><td>&quot;chr1&quot;</td><td>528805</td><td>0</td><td>&quot;100M&quot;</td><td>&quot;chr1&quot;</td><td>528978</td><td>268</td><td>&quot;TCTGCCCCACTCTC…</td><td>&quot;???????+??????…</td><td>528904</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (160_178, 12)\n",
"┌──────────────────┬──────┬───────┬────────┬───┬──────┬─────────────────┬─────────────────┬────────┐\n",
"│ qname ┆ flag ┆ rname ┆ pos ┆ … ┆ tlen ┆ seq ┆ qual ┆ end │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ u16 ┆ cat ┆ i32 ┆ ┆ i32 ┆ str ┆ str ┆ i32 │\n",
"╞══════════════════╪══════╪═══════╪════════╪═══╪══════╪═════════════════╪═════════════════╪════════╡\n",
"│ SRR4435251::::31 ┆ 83 ┆ chr1 ┆ 82736 ┆ … ┆ -319 ┆ TAAAAAAGAATGCAG ┆ ??????????????? ┆ 82835 │\n",
"│ 3654063 ┆ ┆ ┆ ┆ ┆ ┆ ATATTACAAAACCAG ┆ ??????????????? ┆ │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ TT… ┆ ??… ┆ │\n",
"│ SRR4435251::::13 ┆ 147 ┆ chr1 ┆ 82742 ┆ … ┆ -331 ┆ AGAATGCAGATATTA ┆ ??????????????? ┆ 82841 │\n",
"│ 56039 ┆ ┆ ┆ ┆ ┆ ┆ CAAAACCAGTTTACA ┆ ??????????????? ┆ │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ AA… ┆ ??… ┆ │\n",
"│ SRR4435251::::27 ┆ 163 ┆ chr1 ┆ 82744 ┆ … ┆ 338 ┆ AATGCAGATATTACA ┆ ??????????????? ┆ 82843 │\n",
"│ 0890793 ┆ ┆ ┆ ┆ ┆ ┆ AAACCAGTTTACAAA ┆ ??????????????? ┆ │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ AG… ┆ ??… ┆ │\n",
"│ SRR4435251::::13 ┆ 83 ┆ chr1 ┆ 82748 ┆ … ┆ -360 ┆ CAGATATTACAAAAC ┆ ??????????????? ┆ 82847 │\n",
"│ 56040 ┆ ┆ ┆ ┆ ┆ ┆ CAGTTTACAAAAGTT ┆ ??????????????? ┆ │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ AC… ┆ ??… ┆ │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ SRR4435251::::45 ┆ 83 ┆ chr1 ┆ 528792 ┆ … ┆ -409 ┆ TGTCTGGTCTGGCTC ┆ ??????????????? ┆ 528891 │\n",
"│ 5831805 ┆ ┆ ┆ ┆ ┆ ┆ TGCCCCACTCTCCTT ┆ ??????????????? ┆ │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ CT… ┆ ??… ┆ │\n",
"│ SRR4435251::::13 ┆ 83 ┆ chr1 ┆ 528800 ┆ … ┆ -205 ┆ CTGGCTCTGCCCCAC ┆ ??5????????5?5? ┆ 528899 │\n",
"│ 78464 ┆ ┆ ┆ ┆ ┆ ┆ TCTCCTTCTCTCCTA ┆ ??5'55+???+55?? ┆ │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ GT… ┆ ??… ┆ │\n",
"│ SRR4435251::::13 ┆ 147 ┆ chr1 ┆ 528803 ┆ … ┆ -336 ┆ GCTCTGCCCCACTCT ┆ ????????5?5???? ┆ 528902 │\n",
"│ 78465 ┆ ┆ ┆ ┆ ┆ ┆ CCTTCTCACCTAGTT ┆ ??????????????? ┆ │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ GG… ┆ ??… ┆ │\n",
"│ SRR4435251::::27 ┆ 99 ┆ chr1 ┆ 528805 ┆ … ┆ 268 ┆ TCTGCCCCACTCTCC ┆ ???????+??????? ┆ 528904 │\n",
"│ 0862085 ┆ ┆ ┆ ┆ ┆ ┆ TTTTCACCTAGTTGG ┆ ??????????????? ┆ │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ AA… ┆ 5?… ┆ │\n",
"└──────────────────┴──────┴───────┴────────┴───┴──────┴─────────────────┴─────────────────┴────────┘"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f = smart_open.open(\"https://oxbow-ngs.s3.us-east-2.amazonaws.com/example.bam\", \"rb\")\n",
"g = smart_open.open(\"https://oxbow-ngs.s3.us-east-2.amazonaws.com/example.bam.bai\", \"rb\")\n",
"ipc = ox.read_bam(f, index=g)\n",
"pl.read_ipc(ipc)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## File-like on BBI"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"f = smart_open.open(\n",
" \"https://oxbow-ngs.s3.us-east-2.amazonaws.com/valid.bigWig\", \"rb\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
"}\n",
"</style>\n",
"<small>shape: (100_000, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>chrom</th><th>start</th><th>end</th><th>value</th></tr><tr><td>cat</td><td>u32</td><td>u32</td><td>f32</td></tr></thead><tbody><tr><td>&quot;chr17&quot;</td><td>59898</td><td>59900</td><td>0.06792</td></tr><tr><td>&quot;chr17&quot;</td><td>59900</td><td>59947</td><td>0.16627</td></tr><tr><td>&quot;chr17&quot;</td><td>59947</td><td>59999</td><td>0.85137</td></tr><tr><td>&quot;chr17&quot;</td><td>59999</td><td>60044</td><td>0.86883</td></tr><tr><td>&quot;chr17&quot;</td><td>60044</td><td>60046</td><td>0.80091</td></tr><tr><td>&quot;chr17&quot;</td><td>60046</td><td>60072</td><td>0.70256</td></tr><tr><td>&quot;chr17&quot;</td><td>60072</td><td>60145</td><td>0.01746</td></tr><tr><td>&quot;chr17&quot;</td><td>60484</td><td>60485</td><td>0.19197</td></tr><tr><td>&quot;chr17&quot;</td><td>60485</td><td>60486</td><td>0.30831</td></tr><tr><td>&quot;chr17&quot;</td><td>60486</td><td>60493</td><td>0.40066</td></tr><tr><td>&quot;chr17&quot;</td><td>60493</td><td>60496</td><td>0.40898</td></tr><tr><td>&quot;chr17&quot;</td><td>60496</td><td>60497</td><td>0.4499</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;chr17&quot;</td><td>216049</td><td>216050</td><td>283.279999</td></tr><tr><td>&quot;chr17&quot;</td><td>216050</td><td>216051</td><td>277.049011</td></tr><tr><td>&quot;chr17&quot;</td><td>216051</td><td>216052</td><td>272.397003</td></tr><tr><td>&quot;chr17&quot;</td><td>216052</td><td>216053</td><td>265.96701</td></tr><tr><td>&quot;chr17&quot;</td><td>216053</td><td>216054</td><td>257.497986</td></tr><tr><td>&quot;chr17&quot;</td><td>216054</td><td>216055</td><td>251.712997</td></tr><tr><td>&quot;chr17&quot;</td><td>216055</td><td>216056</td><td>245.505005</td></tr><tr><td>&quot;chr17&quot;</td><td>216056</td><td>216057</td><td>238.621994</td></tr><tr><td>&quot;chr17&quot;</td><td>216057</td><td>216058</td><td>233.033005</td></tr><tr><td>&quot;chr17&quot;</td><td>216058</td><td>216059</td><td>226.184006</td></tr><tr><td>&quot;chr17&quot;</td><td>216059</td><td>216060</td><td>221.373993</td></tr><tr><td>&quot;chr17&quot;</td><td>216060</td><td>216061</td><td>216.485001</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (100_000, 4)\n",
"┌───────┬────────┬────────┬────────────┐\n",
"│ chrom ┆ start ┆ end ┆ value │\n",
"│ --- ┆ --- ┆ --- ┆ --- │\n",
"│ cat ┆ u32 ┆ u32 ┆ f32 │\n",
"╞═══════╪════════╪════════╪════════════╡\n",
"│ chr17 ┆ 59898 ┆ 59900 ┆ 0.06792 │\n",
"│ chr17 ┆ 59900 ┆ 59947 ┆ 0.16627 │\n",
"│ chr17 ┆ 59947 ┆ 59999 ┆ 0.85137 │\n",
"│ chr17 ┆ 59999 ┆ 60044 ┆ 0.86883 │\n",
"│ … ┆ … ┆ … ┆ … │\n",
"│ chr17 ┆ 216057 ┆ 216058 ┆ 233.033005 │\n",
"│ chr17 ┆ 216058 ┆ 216059 ┆ 226.184006 │\n",
"│ chr17 ┆ 216059 ┆ 216060 ┆ 221.373993 │\n",
"│ chr17 ┆ 216060 ┆ 216061 ┆ 216.485001 │\n",
"└───────┴────────┴────────┴────────────┘"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ipc = ox.read_bigwig(f)\n",
"pl.read_ipc(ipc)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"g = smart_open.open(\n",
" \"https://oxbow-ngs.s3.us-east-2.amazonaws.com/small.bigBed\", \"rb\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
"}\n",
"</style>\n",
"<small>shape: (27, 11)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>chrom</th><th>start</th><th>end</th><th>name</th><th>score</th><th>strand</th><th>thickStart</th><th>thickEnd</th><th>reserved</th><th>ccre</th><th>classification</th></tr><tr><td>cat</td><td>u32</td><td>u32</td><td>str</td><td>u32</td><td>str</td><td>u32</td><td>u32</td><td>u32</td><td>str</td><td>str</td></tr></thead><tbody><tr><td>&quot;chr17&quot;</td><td>118343</td><td>118595</td><td>&quot;EH38E1838787&quot;</td><td>0</td><td>&quot;.&quot;</td><td>118343</td><td>118595</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>142029</td><td>142378</td><td>&quot;EH38E1838788&quot;</td><td>0</td><td>&quot;.&quot;</td><td>142029</td><td>142378</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>143819</td><td>144169</td><td>&quot;EH38E1838789&quot;</td><td>0</td><td>&quot;.&quot;</td><td>143819</td><td>144169</td><td>6218147</td><td>&quot;DNase-only&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>156467</td><td>156703</td><td>&quot;EH38E1838794&quot;</td><td>0</td><td>&quot;.&quot;</td><td>156467</td><td>156703</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>163674</td><td>163938</td><td>&quot;EH38E1838796&quot;</td><td>0</td><td>&quot;.&quot;</td><td>163674</td><td>163938</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>164413</td><td>164566</td><td>&quot;EH38E1838797&quot;</td><td>0</td><td>&quot;.&quot;</td><td>164413</td><td>164566</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>164585</td><td>164908</td><td>&quot;EH38E1838798&quot;</td><td>0</td><td>&quot;.&quot;</td><td>164585</td><td>164908</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>177817</td><td>178148</td><td>&quot;EH38E1838801&quot;</td><td>0</td><td>&quot;.&quot;</td><td>177817</td><td>178148</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>178300</td><td>178458</td><td>&quot;EH38E1838802&quot;</td><td>0</td><td>&quot;.&quot;</td><td>178300</td><td>178458</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>178776</td><td>179115</td><td>&quot;EH38E1838803&quot;</td><td>0</td><td>&quot;.&quot;</td><td>178776</td><td>179115</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>179132</td><td>179349</td><td>&quot;EH38E1838804&quot;</td><td>0</td><td>&quot;.&quot;</td><td>179132</td><td>179349</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>179796</td><td>180018</td><td>&quot;EH38E1838805&quot;</td><td>0</td><td>&quot;.&quot;</td><td>179796</td><td>180018</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;chr17&quot;</td><td>181270</td><td>181500</td><td>&quot;EH38E1838809&quot;</td><td>0</td><td>&quot;.&quot;</td><td>181270</td><td>181500</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>181586</td><td>181802</td><td>&quot;EH38E1838810&quot;</td><td>0</td><td>&quot;.&quot;</td><td>181586</td><td>181802</td><td>6218147</td><td>&quot;DNase-only&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>182023</td><td>182199</td><td>&quot;EH38E1838811&quot;</td><td>0</td><td>&quot;.&quot;</td><td>182023</td><td>182199</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>182262</td><td>182430</td><td>&quot;EH38E1838812&quot;</td><td>0</td><td>&quot;.&quot;</td><td>182262</td><td>182430</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>182459</td><td>182806</td><td>&quot;EH38E1838813&quot;</td><td>0</td><td>&quot;.&quot;</td><td>182459</td><td>182806</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>183025</td><td>183365</td><td>&quot;EH38E1838814&quot;</td><td>0</td><td>&quot;.&quot;</td><td>183025</td><td>183365</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>185648</td><td>185892</td><td>&quot;EH38E1838816&quot;</td><td>0</td><td>&quot;.&quot;</td><td>185648</td><td>185892</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>186808</td><td>187143</td><td>&quot;EH38E1838818&quot;</td><td>0</td><td>&quot;.&quot;</td><td>186808</td><td>187143</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>193945</td><td>194105</td><td>&quot;EH38E1838821&quot;</td><td>0</td><td>&quot;.&quot;</td><td>193945</td><td>194105</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>194418</td><td>194762</td><td>&quot;EH38E1838822&quot;</td><td>0</td><td>&quot;.&quot;</td><td>194418</td><td>194762</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>195326</td><td>195486</td><td>&quot;EH38E1838824&quot;</td><td>0</td><td>&quot;.&quot;</td><td>195326</td><td>195486</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr><tr><td>&quot;chr17&quot;</td><td>199123</td><td>199461</td><td>&quot;EH38E1838826&quot;</td><td>0</td><td>&quot;.&quot;</td><td>199123</td><td>199461</td><td>225225225</td><td>&quot;Low-DNase&quot;</td><td>&quot;Missing-data/P…</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (27, 11)\n",
"┌───────┬────────┬────────┬──────────────┬───┬──────────┬───────────┬────────────┬─────────────────┐\n",
"│ chrom ┆ start ┆ end ┆ name ┆ … ┆ thickEnd ┆ reserved ┆ ccre ┆ classification │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ cat ┆ u32 ┆ u32 ┆ str ┆ ┆ u32 ┆ u32 ┆ str ┆ str │\n",
"╞═══════╪════════╪════════╪══════════════╪═══╪══════════╪═══════════╪════════════╪═════════════════╡\n",
"│ chr17 ┆ 118343 ┆ 118595 ┆ EH38E1838787 ┆ … ┆ 118595 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n",
"│ chr17 ┆ 142029 ┆ 142378 ┆ EH38E1838788 ┆ … ┆ 142378 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n",
"│ chr17 ┆ 143819 ┆ 144169 ┆ EH38E1838789 ┆ … ┆ 144169 ┆ 6218147 ┆ DNase-only ┆ Missing-data/Pa │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n",
"│ chr17 ┆ 156467 ┆ 156703 ┆ EH38E1838794 ┆ … ┆ 156703 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ chr17 ┆ 193945 ┆ 194105 ┆ EH38E1838821 ┆ … ┆ 194105 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n",
"│ chr17 ┆ 194418 ┆ 194762 ┆ EH38E1838822 ┆ … ┆ 194762 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n",
"│ chr17 ┆ 195326 ┆ 195486 ┆ EH38E1838824 ┆ … ┆ 195486 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n",
"│ chr17 ┆ 199123 ┆ 199461 ┆ EH38E1838826 ┆ … ┆ 199461 ┆ 225225225 ┆ Low-DNase ┆ Missing-data/Pa │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ rtial-classific │\n",
"│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at… │\n",
"└───────┴────────┴────────┴──────────────┴───┴──────────┴───────────┴────────────┴─────────────────┘"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ipc = ox.read_bigbed(g)\n",
"pl.read_ipc(ipc)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "oxbow",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment