Last active
October 4, 2023 12:40
-
-
Save reiaoki/397e1ae0b841bd77a4bec824a58eebd2 to your computer and use it in GitHub Desktop.
Searcch pubmed using Bio.Entrez, write results on a CSV file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#serach pubmed with a keywword, write specific information on a CSV file using E Utilities | |
from Bio import Entrez | |
import csv | |
def search(query): | |
#provide NCBI your email address | |
Entrez.email = 'youremail@email.com' | |
#get the list of IDs | |
searchHandle = Entrez.esearch(db='pubmed', | |
sort='relevance', | |
retmax='500', | |
retmode='xml', | |
mindate = 2013, | |
maxdate = 2017, | |
term=query) | |
searchResults = Entrez.read(searchHandle) | |
id = searchResults['IdList'] | |
print(id) | |
idList = ','.join(id) | |
#get specific information regarding the article | |
fetchHandle = Entrez.efetch(db = 'pubmed', | |
retmode = 'xml', | |
id = idList) | |
fetchResults = Entrez.read(fetchHandle) | |
title = [' ', 'Title', 'Abstract', 'Address', 'Name', 'Year', 'Country'] | |
results = [] | |
results.append(title) | |
counter = 0 | |
for paper in fetchResults['PubmedArticle']: | |
PMID = paper['MedlineCitation']['PMID'] | |
article = paper['MedlineCitation']['Article'] | |
#some articles lack certain information, rasing exception to keep the loop going | |
try: | |
title = article['ArticleTitle'] | |
abstract = article['Abstract']['AbstractText'][0] | |
articleYear = article['Journal']['JournalIssue']['PubDate']['Year'] | |
name = article['AuthorList'][0] | |
formatedName = name['ForeName'] +' '+name['LastName'] | |
affiliation = article['AuthorList'][-1]['AffiliationInfo'][0]['Affiliation'] | |
publicationType = paper['MedlineCitation']['Article']['PublicationTypeList'][0] | |
country = affiliation[(affiliation.rfind(',')) + 2:][:-1] | |
except (KeyError, IndexError) as error: | |
pass | |
return results | |
#write the results on CSV file | |
def _csv_writer(filename, query): | |
with open(filename, 'w', newline = '', encoding='UTF-8') as file: | |
csv_writer = csv.writer(file, dialect = 'excel') | |
rawData = search(query) | |
for i in rawData: | |
csv_writer.writerow(i) | |
file.close() | |
_csv_writer('takedaTest.csv', 'adoptive cell therapy') | |
#search('copd') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment