Skip to content

Instantly share code, notes, and snippets.

@chapmanb
Created December 3, 2010 21:57
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chapmanb/727625 to your computer and use it in GitHub Desktop.
Save chapmanb/727625 to your computer and use it in GitHub Desktop.
retrieve_gene.py
from Bio import Entrez
def fetch_gene_coordinates(search_term):
handle = Entrez.esearch(db="gene", term=search_term)
rec = Entrez.read(handle)
gene_id = rec["IdList"][0] # assuming best match works
handle = Entrez.efetch(db="gene", id=gene_id, retmode="xml")
rec = Entrez.read(handle)[0]
gene_locus = rec["Entrezgene_locus"][0]
region = gene_locus["Gene-commentary_seqs"][0]["Seq-loc_int"]["Seq-interval"]
start = int(region["Seq-interval_from"]) + 1
end = int(region["Seq-interval_to"]) + 1
gi_id = region["Seq-interval_id"]["Seq-id"]["Seq-id_gi"]
strand = region["Seq-interval_strand"]["Na-strand"].attributes["value"]
return gi_id, start, end, strand
def get_fasta_seq(gi_id, start, end, strand):
strand = 2 if strand.lower() == "minus" else 1
handle = Entrez.efetch(db="nucleotide", rettype="fasta", id=gi_id,
seq_start=start, seq_stop=end, strand=strand)
return handle.read()
Entrez.email = "yours@mail.com"
search_term = "fliC ct18"
gi_id, start, end, strand = fetch_gene_coordinates(search_term)
print get_fasta_seq(gi_id, start, end, strand)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment