-
-
Save gfudenberg/3b016cdc20b3e482cdd9411d3542f823 to your computer and use it in GitHub Desktop.
draft_align_tables.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>chrom</th>\n", | |
" <th>start</th>\n", | |
" <th>end</th>\n", | |
" <th>strand2</th>\n", | |
" <th>animal</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>chr1</td>\n", | |
" <td>0</td>\n", | |
" <td>6</td>\n", | |
" <td>+</td>\n", | |
" <td>cat</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>chr1</td>\n", | |
" <td>8</td>\n", | |
" <td>10</td>\n", | |
" <td>+</td>\n", | |
" <td>dog</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>chrX</td>\n", | |
" <td>1</td>\n", | |
" <td>8</td>\n", | |
" <td>+</td>\n", | |
" <td>cat</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" chrom start end strand2 animal\n", | |
"22 chr1 0 6 + cat\n", | |
"11 chr1 8 10 + dog\n", | |
"16 chrX 1 8 + cat" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"import bioframe\n", | |
"import pyranges as pr\n", | |
"import numpy as np\n", | |
"from io import StringIO\n", | |
"from bioframe.ops import _get_default_colnames, _verify_columns\n", | |
"\n", | |
"df1 = pd.DataFrame(\n", | |
" [\n", | |
" [\"chr1\", 0, 6, \"+\"],\n", | |
" [\"chr1\", 8, 10, \"-\"],\n", | |
" [\"chrX\", 1, 8, \"+\"],\n", | |
" ],\n", | |
" columns=[\"chrom\", \"start\", \"end\", \"strand\"],\n", | |
")\n", | |
"\n", | |
"df2 = pd.DataFrame(\n", | |
" [\n", | |
" [\"chr1\", 0, 6, \"+\", \"cat\"],\n", | |
" [\"chr1\", 8, 10, \"+\", \"dog\"],\n", | |
" [\"chrX\", 1, 8, \"+\", \"cat\"],\n", | |
" ],\n", | |
" columns=[\"chrom\", \"start\", \"end\", \"strand2\", \"animal\"],\n", | |
")\n", | |
"df2.set_index(pd.Index([22,11, 16]), inplace=True)\n", | |
"df2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def align_tables_pd(df1,df2,cols1=None,cols2=None):\n", | |
" ck1, sk1, ek1 = _get_default_colnames() if cols1 is None else cols2\n", | |
" ck2, sk2, ek2 = _get_default_colnames() if cols2 is None else cols2\n", | |
" df1.reset_index(inplace=True,drop=True)\n", | |
" df2.reset_index(inplace=True,drop=True)\n", | |
" pd.testing.assert_frame_equal(df1[[ck1,sk1,ek1 ]],df2[[ck2, sk2, ek2 ]], ) \n", | |
" return df1,df2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def align_tables_by_values(df1,df2,cols1=None,cols2=None):\n", | |
" ck1, sk1, ek1 = _get_default_colnames() if cols1 is None else cols2\n", | |
" ck2, sk2, ek2 = _get_default_colnames() if cols2 is None else cols2\n", | |
" if not (df1[[ck1,sk1,ek1 ]].eq(df2[[ck1,sk1,ek1 ]].values) | \n", | |
" (df1[[ck1,sk1,ek1 ]].isnull().values & df2[[ck1,sk1,ek1 ]].isnull().values)).all().all():\n", | |
" raise AssertionError('dataframes not equivalent')\n", | |
" else:\n", | |
" df1.reset_index(inplace=True,drop=True)\n", | |
" df2.reset_index(inplace=True,drop=True)\n", | |
" return df1,df2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"( chrom start end strand\n", | |
" 0 chr1 0 6 +\n", | |
" 1 chr1 8 10 -\n", | |
" 2 chrX 1 8 +,\n", | |
" chrom start end strand2 animal\n", | |
" 0 chr1 0 6 + cat\n", | |
" 1 chr1 8 10 + dog\n", | |
" 2 chrX 1 8 + cat)" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df1,df2 = align_tables_np(df1,df2)\n", | |
"df1,df2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment