Created
August 11, 2020 17:27
-
-
Save meren/bb27156e827551c1994181036173b200 to your computer and use it in GitHub Desktop.
Get gene calls and their sequences from an anvi'o contigs database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from anvio.dbops import ContigsSuperclass | |
# if your args object contains a `contigs_db` entry in its | |
# namespace all you don't need the following two lines and | |
# you can directly pass it to the ContigsSuperclass. | |
import argparse | |
args = argparse.Namespace(contigs_db="INFANT-GUT-TUTORIAL/SPLITAH/E_facealis/CONTIGS.db") | |
# get an instance of the contigs super: | |
contigs_db = ContigsSuperclass(args) | |
# learn every gene caller in the database: | |
gene_caller_ids = list(contigs_db.genes_in_contigs_dict.keys()) | |
# and you're done: | |
gene_caller_ids, gene_sequences_dict = contigs_db.get_sequences_for_gene_callers_ids(gene_caller_ids, include_aa_sequences=True) | |
# how many genes? | |
print(f"num genes:\n{len(gene_caller_ids)}\n\n") | |
# first few gene caller ids: | |
print(f"first few gene caller ids:\n{gene_caller_ids[0:20]}\n\n") | |
# just to see the output data structure, lets | |
# print out the very first entry in gene sequences | |
# dict :) | |
print(f"example data for gene caller id 17:\n") | |
import anvio | |
anvio.P(gene_sequences_dict[4]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python get_genes.py | |
num genes: | |
2754 | |
first few gene caller ids: | |
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] | |
example data for gene caller id 17: | |
{ | |
"sequence": "ATGAAACATTCACAACTTGTGGCGATTATTAAGAGACTGGAAGCAATGATCGAAGCAGCAGATAATGAAGTACAAGTACGCCGCTTTGAACGTGAAGGCGTAGAGAAATGTATTGTAAGTTTTGATAAATCAACAGAAACATTTGAATTAACAGAATCTGATACGCACCAAAGCTATCAATTCGATAACATCGATATTGTAGCAATGGAAATTTACGACTTAATTCAATAA", | |
"contig": "Day17a_QCcontig1", | |
"start": 3552, | |
"stop": 3783, | |
"direction": "f", | |
"rev_compd": "False", | |
"length": 231, | |
"aa_sequence": "MKHSQLVAIIKRLEAMIEAADNEVQVRRFEREGVEKCIVSFDKSTETFELTESDTHQSYQFDNIDIVAMEIYDLIQ" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment