Skip to content

Instantly share code, notes, and snippets.

@AlexMikhalev
Created July 26, 2022 14:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AlexMikhalev/c35657e88fce7962e96c3d24a70e0032 to your computer and use it in GitHub Desktop.
Save AlexMikhalev/c35657e88fce7962e96c3d24a70e0032 to your computer and use it in GitHub Desktop.
fetch_pubmed_notes.py
import requests
import json
db = 'pmc'
domain = 'https://www.ncbi.nlm.nih.gov/entrez/eutils'
nresults = 4
query = "depression"
retmode='json'
# standard query
queryLinkSearch = f'{domain}/esearch.fcgi?db={db}&retmax={nresults}&retmode={retmode}&term={query}'
response = requests.get(queryLinkSearch)
pubmedJson = response.json()
results = []
for paperId in pubmedJson["esearchresult"]["idlist"]:
# metadata query
# queryLinkSummary = f'{domain}/esummary.fcgi?db={db}&id={paperId}&retmode={retmode}'
# print(queryLinkSummary)
fetch_abstract=f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db={db}&id={paperId}&retmode=xml&rettype=abstract'
print(fetch_abstract)
# results.append({'paperId': paperId, 'metadata': requests.get(queryLinkSummary).json()})
# results.append({'paperId': paperId, 'abstract': requests.get(fetch_abstract).json()})
# for pubmed abstract
# my_dict=xmltodict.parse(response.content)
# my_dict['PubmedArticleSet']['PubmedArticle']['MedlineCitation']['Article']['Abstract']['AbstractText']
# print(each_line.keys()) dict_keys(['@Label', '@NlmCategory', '#text'])
# for each_line in my_dict['PubmedArticleSet']['PubmedArticle']['MedlineCitation']['Article']['Abstract']['AbstractText']:
# print(each_line['@Label'])
# print(each_line['#text'])
# PMC abstract
# my_pmc_dict['pmc-articleset']['article']['front']['article-meta']['abstract'] or (my_pmc_dict['pmc-articleset']['article']['front']['article-meta']['abstract']['sec'])
# my_pmc_dict['pmc-articleset']['article']['body']
# >>> my_pmc_dict['pmc-articleset']['article']['body']['sec'][0].keys()
# dict_keys(['@id', 'title', 'p', 'sec'])
# publication date
# my_pmc_dict['pmc-articleset']['article']['front']['article-meta']['pub-date'][0]
# my_pmc_dict['pmc-articleset']['article']['front']['article-meta']['pub-date'][0]['day']
# my_pmc_dict['pmc-articleset']['article']['front']['article-meta']['pub-date'][0]['month']
# my_pmc_dict['pmc-articleset']['article']['front']['article-meta']['pub-date'][0]['year']
# get all text paragraphs
# for each_paragraph in my_pmc_dict['pmc-articleset']['article']['body']['sec']:
# if
# each_paragraph['p'][0]['#text']
# resultsSorted = sorted(results, key=lambda x: x["metadata"]["result"][x["paperId"]]["fulljournalname"])
# with open('resultsSorted.json', 'w') as f:
# json.dump(results, f)
# handle = Entrez.esearch(db="pubmed", term="obesity", retmax=5)
# results = Entrez.read(handle)
# print(results["IdList"])
# def print_abstract(pmid):
# handle = efetch(db='pubmed', id=pmid, retmode='text', rettype='abstract')
# print(handle.read()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment