Skip to content

Instantly share code, notes, and snippets.

@reiaoki
Last active October 4, 2023 12:40
Show Gist options
  • Save reiaoki/397e1ae0b841bd77a4bec824a58eebd2 to your computer and use it in GitHub Desktop.
Save reiaoki/397e1ae0b841bd77a4bec824a58eebd2 to your computer and use it in GitHub Desktop.
Searcch pubmed using Bio.Entrez, write results on a CSV file
#serach pubmed with a keywword, write specific information on a CSV file using E Utilities
from Bio import Entrez
import csv
def search(query):
#provide NCBI your email address
Entrez.email = 'youremail@email.com'
#get the list of IDs
searchHandle = Entrez.esearch(db='pubmed',
sort='relevance',
retmax='500',
retmode='xml',
mindate = 2013,
maxdate = 2017,
term=query)
searchResults = Entrez.read(searchHandle)
id = searchResults['IdList']
print(id)
idList = ','.join(id)
#get specific information regarding the article
fetchHandle = Entrez.efetch(db = 'pubmed',
retmode = 'xml',
id = idList)
fetchResults = Entrez.read(fetchHandle)
title = [' ', 'Title', 'Abstract', 'Address', 'Name', 'Year', 'Country']
results = []
results.append(title)
counter = 0
for paper in fetchResults['PubmedArticle']:
PMID = paper['MedlineCitation']['PMID']
article = paper['MedlineCitation']['Article']
#some articles lack certain information, rasing exception to keep the loop going
try:
title = article['ArticleTitle']
abstract = article['Abstract']['AbstractText'][0]
articleYear = article['Journal']['JournalIssue']['PubDate']['Year']
name = article['AuthorList'][0]
formatedName = name['ForeName'] +' '+name['LastName']
affiliation = article['AuthorList'][-1]['AffiliationInfo'][0]['Affiliation']
publicationType = paper['MedlineCitation']['Article']['PublicationTypeList'][0]
country = affiliation[(affiliation.rfind(',')) + 2:][:-1]
except (KeyError, IndexError) as error:
pass
return results
#write the results on CSV file
def _csv_writer(filename, query):
with open(filename, 'w', newline = '', encoding='UTF-8') as file:
csv_writer = csv.writer(file, dialect = 'excel')
rawData = search(query)
for i in rawData:
csv_writer.writerow(i)
file.close()
_csv_writer('takedaTest.csv', 'adoptive cell therapy')
#search('copd')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment