-
-
Save bonzanini/5a4c39e4c02502a8451d to your computer and use it in GitHub Desktop.
# This code uses Biopython to retrieve lists of articles from pubmed | |
# you need to install Biopython first. | |
# If you use Anaconda: | |
# conda install biopython | |
# If you use pip/venv: | |
# pip install biopython | |
# Full discussion: | |
# https://marcobonzanini.wordpress.com/2015/01/12/searching-pubmed-with-python/ | |
from Bio import Entrez | |
def search(query): | |
Entrez.email = 'your.email@example.com' | |
handle = Entrez.esearch(db='pubmed', | |
sort='relevance', | |
retmax='20', | |
retmode='xml', | |
term=query) | |
results = Entrez.read(handle) | |
return results | |
def fetch_details(id_list): | |
ids = ','.join(id_list) | |
Entrez.email = 'your.email@example.com' | |
handle = Entrez.efetch(db='pubmed', | |
retmode='xml', | |
id=ids) | |
results = Entrez.read(handle) | |
return results | |
if __name__ == '__main__': | |
results = search('fever') | |
id_list = results['IdList'] | |
papers = fetch_details(id_list) | |
for i, paper in enumerate(papers['PubmedArticle']): | |
print("{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle'])) |
This is awesome thanks and works pretty well. I'm stuck on trying to get the details of the authors in a succinct way, can anybody help with how to do that? paper['MedlineCitation']['Article']['AuthorList'] isnt right....
Thanks in advance!
Thanks this is very helpful. Somewhat similar to @echorule, I'm having trouble obtaining the full abstract of a paper as one string. What I get looks like this:
StringElement(' ... ', attributes={'Label': 'BACKGROUND', 'NlmCategory': 'BACKGROUND'}), StringElement(' ... ', attributes={'Label': 'METHODS', 'NlmCategory': 'METHODS'}), StringElement('...' ...)
Is there a more reasonable way to go about this?
Thanks this is very helpful. Somewhat similar to @echorule, I'm having trouble obtaining the full abstract of a paper as one string. What I get looks like this:
StringElement(' ... ', attributes={'Label': 'BACKGROUND', 'NlmCategory': 'BACKGROUND'}), StringElement(' ... ', attributes={'Label': 'METHODS', 'NlmCategory': 'METHODS'}), StringElement('...' ...)
Is there a more reasonable way to go about this?
@rsgoncalves did you manage to resolve this? You can fetch abstract by using: paper['MedlineCitation']['Article']['Abstract']['AbstractText']
So your full code retrieving for author, title and abstract could look something like:
from Bio import Entrez
def search(query):
Entrez.email = 'your.email@example.com'
handle = Entrez.esearch(db='pubmed',
sort='relevance',
retmax='20',
retmode='xml',
term=query)
results = Entrez.read(handle)
return results
def fetch_details(id_list):
ids = ','.join(id_list)
Entrez.email = 'your.email@example.com'
handle = Entrez.efetch(db='pubmed',
retmode='xml',
id=ids)
results = Entrez.read(handle)
return results
def get_abstract(paper):
abstract = ''
if 'Abstract' in paper['MedlineCitation']['Article']:
abstract = paper['MedlineCitation']['Article']['Abstract']['AbstractText']
if isinstance(abstract, list):
abstract = ' '.join(abstract)
return abstract
if __name__ == '__main__':
results = search('fever')
id_list = results['IdList']
papers = fetch_details(id_list)
for i, paper in enumerate(papers['PubmedArticle']):
title = paper['MedlineCitation']['Article']['ArticleTitle']
author_list = paper['MedlineCitation']['Article']['AuthorList']
authors = ', '.join([author.get('LastName', '') for author in author_list])
abstract = get_abstract(paper)
print("{}) Title: {}".format(i+1, title))
print(" Authors: {}".format(authors))
print(" Abstract: {}".format(abstract))
print()
@jajkelle Updated (better late than never), thank you all for pointing it out