Skip to content

Instantly share code, notes, and snippets.

@matteoferla
Created April 11, 2017 19:55
Show Gist options
  • Save matteoferla/660de8a3fb8559ca664c4211c24e490a to your computer and use it in GitHub Desktop.
Save matteoferla/660de8a3fb8559ca664c4211c24e490a to your computer and use it in GitHub Desktop.
def zoospotter(zoo,fn='pubmed_scored.csv', assoclist=['infection']):
"""
Method that downloads the number of Pubmed entries associataed with a given species listed in the zoo variable.
The variable assoclist has the fields sought.
:param zoo: list of species names
:param fn: file name to save.
:param assoclist: list of terms that one wishes to see the association. E.g. 'infection' to find pathogenicity.
:return: None.
"""
#debugprint=print
debugprint=lambda *x: None
debugprint('MODE: Pubmed association')
def get_count(terms):
handle = Entrez.esearch(db="pubmed", term=' AND '.join(terms))
record = Entrez.read(handle)
handle.close()
return record["Count"]
Entrez.email = EMAIL
w=csv.DictWriter(open(fn,'w',newline=''),fieldnames=['name']+['PM_'+s for s in ['plain']+assoclist])
w.writeheader()
for species in zoo:
data={'name':species}
bn=genomeArk.binomialiser(species)
if bn:
sp='{genus} {species}'.format(**bn)
n = get_count([sp])
print(species, '>>> ', sp, ' — ','x', ': ', n)
data['PM_plain']=n
for assoc in assoclist:
n=get_count([sp, assoc])
print(species,'>>> ',sp,' — ',assoc,': ',n)
data['PM_'+assoc]=n
debugprint(data)
w.writerow(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment