Skip to content

Instantly share code, notes, and snippets.

@SilasK
Created October 21, 2021 13:02
Show Gist options
  • Save SilasK/5932a6887ee4d2520b5a59cec06d09b7 to your computer and use it in GitHub Desktop.
Save SilasK/5932a6887ee4d2520b5a59cec06d09b7 to your computer and use it in GitHub Desktop.
Linking Atlas genes to contigs and bins
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"id": "opposite-model",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pylab as plt\n"
]
},
{
"cell_type": "code",
"execution_count": 303,
"id": "dominant-monitoring",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"check if file contigs2bins exists: True\n",
"check if file contigs2mags exists: True\n",
"check if file allbins2genome exists: True\n",
"check if file old2newID exists: True\n",
"check if file orf2gene exists: True\n",
"check if file taxonomy exists: True\n",
"check if file egg_Nog exists: True\n"
]
}
],
"source": [
"\n",
"atlas_dir= \"/Users/silas/Documents/scripts_baobab/scratch/Debug_atlas/WD/\"\n",
"\n",
"\n",
"\n",
"\n",
"files=dict(\n",
"contigs2bins=\"genomes/clustering/all_contigs2bins.tsv.gz\",\n",
"contigs2mags=\"genomes/clustering/contig2genome.tsv\",\n",
"allbins2genome=\"genomes/clustering/allbins2genome.tsv\",\n",
"old2newID=\"genomes/clustering/old2newID.tsv\",\n",
"orf2gene=\"Genecatalog/clustering/orf2gene.tsv.gz\",\n",
"taxonomy=\"genomes/taxonomy/gtdb_taxonomy.tsv\",\n",
"egg_Nog= \"Genecatalog/annotations/eggNog.tsv.gz\"\n",
")\n",
"\n",
"for key in files:\n",
" files[key]= os.path.join(atlas_dir,files[key])\n",
"\n",
"# check if files exists\n",
"for key in files:\n",
" print(f\"check if file {key} exists: {os.path.exists(files[key])}\")"
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "frank-sword",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ORF</th>\n",
" <th>Gene</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>sample1_1_30</td>\n",
" <td>Gene0001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>sample2_172_4</td>\n",
" <td>Gene0001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>sample1_2_33</td>\n",
" <td>Gene0002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>sample2_72_9</td>\n",
" <td>Gene0002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>sample1_3_8</td>\n",
" <td>Gene0003</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ORF Gene\n",
"0 sample1_1_30 Gene0001\n",
"1 sample2_172_4 Gene0001\n",
"2 sample1_2_33 Gene0002\n",
"3 sample2_72_9 Gene0002\n",
"4 sample1_3_8 Gene0003"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"orfs = pd.read_table(files['orf2gene'])\n",
"orfs.head()"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "regulated-description",
"metadata": {},
"outputs": [],
"source": [
"orfs['GeneId']= orfs.Gene.str[4:].astype(int)\n",
"orfs[['Contig','GenePosition']]= orfs.ORF.str.rsplit('_',n=1,expand=True)\n",
"orfs['Contig']= orfs.Contig.astype('category')\n",
"orfs['GenePosition']= orfs.GenePosition.astype(np.uint16)\n",
" \n",
" \n",
"orfs.drop(['ORF','Gene'],axis=1,inplace=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "alone-employer",
"metadata": {},
"outputs": [],
"source": [
"orfs.index= pd.MultiIndex.from_frame(orfs[['Contig','GenePosition']])\n",
"#orfs.sort_index(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "overhead-modern",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 181,
"id": "proprietary-pendant",
"metadata": {},
"outputs": [],
"source": [
"right_neighbour_index= pd.MultiIndex.from_arrays([orfs.Contig, orfs.GenePosition + 1])\n",
"valid_neighbour= right_neighbour_index.isin(orfs.index)\n",
"right_neighbour_index= right_neighbour_index[valid_neighbour]\n",
"left_neighbour_index= orfs.index[valid_neighbour]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "presidential-hollywood",
"metadata": {
"tags": []
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 184,
"id": "skilled-principal",
"metadata": {},
"outputs": [],
"source": [
"edgelist= pd.DataFrame({'LeftGene': orfs.loc[left_neighbour_index,'GeneId'].values,\n",
" 'RightGene' : orfs.loc[right_neighbour_index,'GeneId'].values,\n",
" 'Contig': orfs.loc[left_neighbour_index,'Contig'].values\n",
" })\n",
"\n",
"support= edgelist.iloc[:,[0,1]].value_counts()\n",
"support.name='N_connections'"
]
},
{
"cell_type": "code",
"execution_count": 185,
"id": "protective-danger",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>LeftGene</th>\n",
" <th>RightGene</th>\n",
" <th>Contig</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>3940</td>\n",
" <td>sample1_1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>689</td>\n",
" <td>sample2_172</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>3366</td>\n",
" <td>sample1_2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>3367</td>\n",
" <td>sample1_3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3</td>\n",
" <td>3367</td>\n",
" <td>sample2_36</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" LeftGene RightGene Contig\n",
"0 1 3940 sample1_1\n",
"1 1 689 sample2_172\n",
"2 2 3366 sample1_2\n",
"3 3 3367 sample1_3\n",
"4 3 3367 sample2_36"
]
},
"execution_count": 185,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"edgelist.head()"
]
},
{
"cell_type": "code",
"execution_count": 188,
"id": "resistant-punch",
"metadata": {},
"outputs": [],
"source": [
"Gene_of_interest='Gene3367'\n",
"GeneID_of_interest = int(Gene_of_interest[4:])\n",
"\n",
"Gene_Neighbourhood = edgelist.query(\"LeftGene == @GeneID_of_interest | RightGene == @GeneID_of_interest\")"
]
},
{
"cell_type": "code",
"execution_count": 189,
"id": "deadly-locking",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>LeftGene</th>\n",
" <th>RightGene</th>\n",
" <th>Contig</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>3367</td>\n",
" <td>sample1_3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3</td>\n",
" <td>3367</td>\n",
" <td>sample2_36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3643</th>\n",
" <td>3367</td>\n",
" <td>2025</td>\n",
" <td>sample1_3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3644</th>\n",
" <td>3367</td>\n",
" <td>2025</td>\n",
" <td>sample2_36</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" LeftGene RightGene Contig\n",
"3 3 3367 sample1_3\n",
"4 3 3367 sample2_36\n",
"3643 3367 2025 sample1_3\n",
"3644 3367 2025 sample2_36"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Gene_Neighbourhood"
]
},
{
"cell_type": "markdown",
"id": "million-enlargement",
"metadata": {},
"source": [
"## Graph Plot \n",
"Plot gene neighbourhood"
]
},
{
"cell_type": "code",
"execution_count": 267,
"id": "otherwise-warren",
"metadata": {},
"outputs": [],
"source": [
"import networkx as nx\n",
"\n",
"G = nx.from_pandas_edgelist(edgelist,'LeftGene','RightGene',edge_attr='Contig', create_using=nx.MultiGraph)"
]
},
{
"cell_type": "code",
"execution_count": 293,
"id": "atomic-attraction",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 299,
"id": "brutal-announcement",
"metadata": {},
"outputs": [],
"source": [
"neighborhood_size=5\n",
"\n",
"Gneighborhood= G.subgraph( nx.single_source_shortest_path_length(G, GeneID_of_interest, cutoff=neighborhood_size).keys() )"
]
},
{
"cell_type": "code",
"execution_count": 300,
"id": "likely-norfolk",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Legend contigs: {'red': 'sample1_3', 'blue': 'sample2_36'}\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"execution_count": 300,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import networkx as nx\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import Image\n",
"\n",
"def pydot_plot_graph(Graph,higlight_node=None):\n",
"\n",
" G = Graph.copy()\n",
" \n",
" Colors=['red','blue','black','orange','green','yellow']\n",
" Contigs=[]\n",
" \n",
" for edge in G.edges(data=True): \n",
" contig= edge[2]['Contig']\n",
" if contig not in Contigs:\n",
" Contigs.append(contig)\n",
" #edge[2]['label'] = contig\n",
" edge[2]['color'] = Colors[Contigs.index(contig)]\n",
" \n",
" \n",
" print(\"Legend contigs: \", dict(zip( Colors,Contigs )))\n",
" \n",
"\n",
" \n",
" p=nx.drawing.nx_pydot.to_pydot(G)\n",
" if higlight_node is not None:\n",
" node_of_interest= p.get_node(str(higlight_node))[0]\n",
" node_of_interest.set_color('red')\n",
" node_of_interest.set_shape('box')\n",
" \n",
" p.write_png('multi.png')\n",
" \n",
" \n",
"pydot_plot_graph(Gneighborhood, GeneID_of_interest)\n",
"Image(filename='multi.png')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "honey-provision",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "shaped-choice",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 286,
"id": "prescription-canvas",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "incorporated-technician",
"metadata": {},
"source": [
"## Contigs and Bins\n",
"\n",
"Needs the file \"genomes/clustering/all_contigs2bins.tsv.gz\" \n",
"To re-create it run\n",
"`atlas run None \"genomes/clustering/all_contigs2bins.tsv.gz\"`"
]
},
{
"cell_type": "code",
"execution_count": 334,
"id": "sitting-opposition",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Bin</th>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>sample2_0</th>\n",
" <td>sample2_maxbin_2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample2_1</th>\n",
" <td>sample2_maxbin_2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample2_2</th>\n",
" <td>sample2_maxbin_2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample2_3</th>\n",
" <td>sample2_maxbin_2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample2_4</th>\n",
" <td>sample2_maxbin_2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Bin\n",
"0 \n",
"sample2_0 sample2_maxbin_2\n",
"sample2_1 sample2_maxbin_2\n",
"sample2_2 sample2_maxbin_2\n",
"sample2_3 sample2_maxbin_2\n",
"sample2_4 sample2_maxbin_2"
]
},
"execution_count": 334,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"contigs= pd.read_table(files['contigs2bins'],index_col=0,header=None)\n",
"contigs.columns= ['Bin']\n",
"contigs.head()"
]
},
{
"cell_type": "code",
"execution_count": 316,
"id": "accessible-keyboard",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>MAG</th>\n",
" <th>Domain</th>\n",
" <th>phylum</th>\n",
" <th>class</th>\n",
" <th>order</th>\n",
" <th>family</th>\n",
" <th>genus</th>\n",
" <th>species</th>\n",
" <th>Label</th>\n",
" </tr>\n",
" <tr>\n",
" <th>genome</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>sample1_maxbin_2</th>\n",
" <td>MAG1</td>\n",
" <td>Bacteria</td>\n",
" <td>Firmicutes</td>\n",
" <td>Bacilli</td>\n",
" <td>Mycoplasmatales</td>\n",
" <td>Metamycoplasmataceae</td>\n",
" <td>Mesomycoplasma</td>\n",
" <td>Mesomycoplasma hyorhinis</td>\n",
" <td>Mesomycoplasma hyorhinis</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample2_maxbin_3</th>\n",
" <td>MAG1</td>\n",
" <td>Bacteria</td>\n",
" <td>Firmicutes</td>\n",
" <td>Bacilli</td>\n",
" <td>Mycoplasmatales</td>\n",
" <td>Metamycoplasmataceae</td>\n",
" <td>Mesomycoplasma</td>\n",
" <td>Mesomycoplasma hyorhinis</td>\n",
" <td>Mesomycoplasma hyorhinis</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample2_maxbin_2</th>\n",
" <td>MAG2</td>\n",
" <td>Bacteria</td>\n",
" <td>Firmicutes</td>\n",
" <td>Bacilli</td>\n",
" <td>Lactobacillales</td>\n",
" <td>Streptococcaceae</td>\n",
" <td>Streptococcus</td>\n",
" <td>Streptococcus thermophilus</td>\n",
" <td>Streptococcus thermophilus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample2_maxbin_1</th>\n",
" <td>MAG3</td>\n",
" <td>Bacteria</td>\n",
" <td>Firmicutes</td>\n",
" <td>Bacilli</td>\n",
" <td>Mycoplasmatales</td>\n",
" <td>Mycoplasmoidaceae</td>\n",
" <td>Ureaplasma</td>\n",
" <td>Ureaplasma urealyticum</td>\n",
" <td>Ureaplasma urealyticum</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" MAG Domain phylum class order \\\n",
"genome \n",
"sample1_maxbin_2 MAG1 Bacteria Firmicutes Bacilli Mycoplasmatales \n",
"sample2_maxbin_3 MAG1 Bacteria Firmicutes Bacilli Mycoplasmatales \n",
"sample2_maxbin_2 MAG2 Bacteria Firmicutes Bacilli Lactobacillales \n",
"sample2_maxbin_1 MAG3 Bacteria Firmicutes Bacilli Mycoplasmatales \n",
"\n",
" family genus \\\n",
"genome \n",
"sample1_maxbin_2 Metamycoplasmataceae Mesomycoplasma \n",
"sample2_maxbin_3 Metamycoplasmataceae Mesomycoplasma \n",
"sample2_maxbin_2 Streptococcaceae Streptococcus \n",
"sample2_maxbin_1 Mycoplasmoidaceae Ureaplasma \n",
"\n",
" species Label \n",
"genome \n",
"sample1_maxbin_2 Mesomycoplasma hyorhinis Mesomycoplasma hyorhinis \n",
"sample2_maxbin_3 Mesomycoplasma hyorhinis Mesomycoplasma hyorhinis \n",
"sample2_maxbin_2 Streptococcus thermophilus Streptococcus thermophilus \n",
"sample2_maxbin_1 Ureaplasma urealyticum Ureaplasma urealyticum "
]
},
"execution_count": 316,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bins = pd.read_table(files['allbins2genome'],index_col=0)\n",
"Tax= pd.read_table(files['taxonomy'],index_col=0)\n",
"\n",
"# create label also for unnabed species\n",
"Tax['Label'] = Tax.ffill(axis=1).species\n",
"Tax.loc[Tax.species.isnull(),'Label']+= ' '+ Tax.index[Tax.species.isnull()]\n",
"\n",
"bins= bins.join(Tax,on='MAG')\n",
"\n",
"\n",
"\n",
"bins.head()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ahead-defendant",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 336,
"id": "japanese-reputation",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Bin</th>\n",
" <th>MAG</th>\n",
" <th>Domain</th>\n",
" <th>phylum</th>\n",
" <th>class</th>\n",
" <th>order</th>\n",
" <th>family</th>\n",
" <th>genus</th>\n",
" <th>species</th>\n",
" <th>Label</th>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>sample1_3</th>\n",
" <td>sample1_maxbin_2</td>\n",
" <td>MAG1</td>\n",
" <td>Bacteria</td>\n",
" <td>Firmicutes</td>\n",
" <td>Bacilli</td>\n",
" <td>Mycoplasmatales</td>\n",
" <td>Metamycoplasmataceae</td>\n",
" <td>Mesomycoplasma</td>\n",
" <td>Mesomycoplasma hyorhinis</td>\n",
" <td>Mesomycoplasma hyorhinis</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sample2_36</th>\n",
" <td>sample2_maxbin_3</td>\n",
" <td>MAG1</td>\n",
" <td>Bacteria</td>\n",
" <td>Firmicutes</td>\n",
" <td>Bacilli</td>\n",
" <td>Mycoplasmatales</td>\n",
" <td>Metamycoplasmataceae</td>\n",
" <td>Mesomycoplasma</td>\n",
" <td>Mesomycoplasma hyorhinis</td>\n",
" <td>Mesomycoplasma hyorhinis</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Bin MAG Domain phylum class \\\n",
"0 \n",
"sample1_3 sample1_maxbin_2 MAG1 Bacteria Firmicutes Bacilli \n",
"sample2_36 sample2_maxbin_3 MAG1 Bacteria Firmicutes Bacilli \n",
"\n",
" order family genus \\\n",
"0 \n",
"sample1_3 Mycoplasmatales Metamycoplasmataceae Mesomycoplasma \n",
"sample2_36 Mycoplasmatales Metamycoplasmataceae Mesomycoplasma \n",
"\n",
" species Label \n",
"0 \n",
"sample1_3 Mesomycoplasma hyorhinis Mesomycoplasma hyorhinis \n",
"sample2_36 Mesomycoplasma hyorhinis Mesomycoplasma hyorhinis "
]
},
"execution_count": 336,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Contig and Bin information for gene of interest\n",
"\n",
"contigs_from_gene_of_interest= orfs.query(\"GeneId == @GeneID_of_interest\").Contig.unique()\n",
"\n",
"\n",
"contigs.loc[contigs_from_gene_of_interest].join(bins,on='Bin')\n",
"\n",
"#bins.loc[contigs.loc[contigs_from_gene_of_interest].values]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "economic-andorra",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment