Skip to content

Instantly share code, notes, and snippets.

@dwinter
Created September 3, 2011 04:24
Show Gist options
  • Save dwinter/1190555 to your computer and use it in GitHub Desktop.
Save dwinter/1190555 to your computer and use it in GitHub Desktop.
An online fungal foray
from Bio import Entrez
#Let NCBI know who you are in case you do something stupid :)
Entrez.email = 'your.name.here@someplace.com'
search_s ='"ectomycorrhizal root tip" AND "New Zealand"'
handle = Entrez.esearch(db='nucleotide', term=search_s, retmax=100)
ids = Entrez.read(handle)['IdList']
ids[:5]
#gives us ['157086858', '157086857', '157086856', '157086855', '157086854']
from Bio import SeqIO
rec_handle = Entrez.efetch(db='nuccore', rettype='gb', id = ', '.join(ids))
recs = SeqIO.parse(rec_handle, 'gb')
from collections import defaultdict
host_dict = defaultdict(list)
for record in recs:
for feature in record.features:
if feature.type == 'source':
try:
host = feature.qualifiers['host'][0]
taxonomy = ':'.join(record.annotations['taxonomy'])
host_dict[host].append(taxonomy)
#can only be one host, so don't need to look other features
break
except KeyError:
#no 'host' in source, will have to dig into these manually
print 'No host for {0}!'.format(record.id)
#they're all silver beech! Let's count unique taxonomies for that species
frequencies = defaultdict(int)
for tax_string in host_dict['Nothofagus menziesii']:
frequencies[tax_string] += 1
frequencies.values()
#gives us:
#[15, 5, 24, 6, 3, 28, 2]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment