Skip to content

Instantly share code, notes, and snippets.

@gfudenberg
Created March 24, 2021 17:39
Show Gist options
  • Save gfudenberg/3b016cdc20b3e482cdd9411d3542f823 to your computer and use it in GitHub Desktop.
Save gfudenberg/3b016cdc20b3e482cdd9411d3542f823 to your computer and use it in GitHub Desktop.
draft_align_tables.ipynb
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>chrom</th>\n",
" <th>start</th>\n",
" <th>end</th>\n",
" <th>strand2</th>\n",
" <th>animal</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>chr1</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>+</td>\n",
" <td>cat</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>chr1</td>\n",
" <td>8</td>\n",
" <td>10</td>\n",
" <td>+</td>\n",
" <td>dog</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>chrX</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>+</td>\n",
" <td>cat</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" chrom start end strand2 animal\n",
"22 chr1 0 6 + cat\n",
"11 chr1 8 10 + dog\n",
"16 chrX 1 8 + cat"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import bioframe\n",
"import pyranges as pr\n",
"import numpy as np\n",
"from io import StringIO\n",
"from bioframe.ops import _get_default_colnames, _verify_columns\n",
"\n",
"df1 = pd.DataFrame(\n",
" [\n",
" [\"chr1\", 0, 6, \"+\"],\n",
" [\"chr1\", 8, 10, \"-\"],\n",
" [\"chrX\", 1, 8, \"+\"],\n",
" ],\n",
" columns=[\"chrom\", \"start\", \"end\", \"strand\"],\n",
")\n",
"\n",
"df2 = pd.DataFrame(\n",
" [\n",
" [\"chr1\", 0, 6, \"+\", \"cat\"],\n",
" [\"chr1\", 8, 10, \"+\", \"dog\"],\n",
" [\"chrX\", 1, 8, \"+\", \"cat\"],\n",
" ],\n",
" columns=[\"chrom\", \"start\", \"end\", \"strand2\", \"animal\"],\n",
")\n",
"df2.set_index(pd.Index([22,11, 16]), inplace=True)\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def align_tables_pd(df1,df2,cols1=None,cols2=None):\n",
" ck1, sk1, ek1 = _get_default_colnames() if cols1 is None else cols2\n",
" ck2, sk2, ek2 = _get_default_colnames() if cols2 is None else cols2\n",
" df1.reset_index(inplace=True,drop=True)\n",
" df2.reset_index(inplace=True,drop=True)\n",
" pd.testing.assert_frame_equal(df1[[ck1,sk1,ek1 ]],df2[[ck2, sk2, ek2 ]], ) \n",
" return df1,df2"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def align_tables_by_values(df1,df2,cols1=None,cols2=None):\n",
" ck1, sk1, ek1 = _get_default_colnames() if cols1 is None else cols2\n",
" ck2, sk2, ek2 = _get_default_colnames() if cols2 is None else cols2\n",
" if not (df1[[ck1,sk1,ek1 ]].eq(df2[[ck1,sk1,ek1 ]].values) | \n",
" (df1[[ck1,sk1,ek1 ]].isnull().values & df2[[ck1,sk1,ek1 ]].isnull().values)).all().all():\n",
" raise AssertionError('dataframes not equivalent')\n",
" else:\n",
" df1.reset_index(inplace=True,drop=True)\n",
" df2.reset_index(inplace=True,drop=True)\n",
" return df1,df2"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"( chrom start end strand\n",
" 0 chr1 0 6 +\n",
" 1 chr1 8 10 -\n",
" 2 chrX 1 8 +,\n",
" chrom start end strand2 animal\n",
" 0 chr1 0 6 + cat\n",
" 1 chr1 8 10 + dog\n",
" 2 chrX 1 8 + cat)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1,df2 = align_tables_np(df1,df2)\n",
"df1,df2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment