Skip to content

Instantly share code, notes, and snippets.

@beneon
Last active July 29, 2019 02:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save beneon/cd2bb92721c2c2c3fdb95f82d1df3b29 to your computer and use it in GitHub Desktop.
Save beneon/cd2bb92721c2c2c3fdb95f82d1df3b29 to your computer and use it in GitHub Desktop.
ben's code snippet
import requests
import os
import xml.etree.ElementTree as ET
import pyperclip
import argparse
import re
reLink = re.compile(r'https://www.ncbi.nlm.nih.gov/pubmed/(\d+)')
parser = argparse.ArgumentParser(
"-i pmid -l pubmed link")
parser.add_argument('-i')
parser.add_argument('-l')
parser.add_argument('-p')
parser.add_argument('-verbose')
parser.add_argument('-paste')
args = parser.parse_args()
if args.i is not None:
print(args.i)
ids = args.i
elif args.p is not None:
ids = re.search(reLink,pyperclip.paste()).group(1)
if ids is None:
raise Exception("link format is wrong")
elif args.l is not None:
print(args.l)
ids = re.search(reLink,args.l).group(1)
if ids is None:
raise Exception("link format is wrong")
else:
ids = '30693853'
dbname = 'pubmed'
def xmlParse(strtxt):
root = ET.fromstring(strtxt)
rstDict = {}
rstDict['pmid'] = root.find('./PubmedArticle/MedlineCitation/PMID').text
rstDict['journal'] = root.find('./PubmedArticle/MedlineCitation/Article/Journal')
rstDict['journal'] = {
'volume': rstDict['journal'].find('./JournalIssue/Volume').text,
'issue': (lambda iss:iss.text if iss is not None else "")(rstDict['journal'].find('./JournalIssue/Issue')),
'pubyear': rstDict['journal'].find('./JournalIssue/PubDate').text,
'title': rstDict['journal'].find('./Title').text,
'abbrev': rstDict['journal'].find('./ISOAbbreviation').text,
}
rstDict['title'] = root.find('./PubmedArticle/MedlineCitation/Article/ArticleTitle').text
rstDict['doi'] = [e for e in root.findall(
'./PubmedArticle/PubmedData/ArticleIdList/ArticleId') if e.get('IdType')=='doi'][0].text
rstDict['abstract'] = root.find(
'./PubmedArticle/MedlineCitation/Article/Abstract')
abstractLabels = [e.get('Label') for e in rstDict['abstract'].findall('AbstractText')]
abstractText = [
e.text for e in rstDict['abstract'].findall('AbstractText')]
rstDict['abstract'] = dict(zip(abstractLabels,abstractText))
rstDict['authors'] = root.find(
'./PubmedArticle/MedlineCitation/Article/AuthorList')
authorsList = rstDict['authors'].findall('Author')
rstDict['authors'] = ["{}, {}({})".format(
e.find('LastName').text,
e.find('ForeName').text,
e.find('Initials').text) for e in authorsList]
return rstDict
fetchedData = requests.get(
f"""https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db={dbname}&id={ids}&rettype=abstract&retmode=XML
"""
)
# 残留代码,调试完成以后已经不从本地读取xml了
with open('out.xml', 'w', encoding='utf8') as xmlFile:
xmlFile.write(fetchedData.text)
rstDict = xmlParse(fetchedData.text)
txt = """
title: {}
journal: {}, {}-{}, {}
author: {}, et al.
doi: {}
""".format(
rstDict['title'],
rstDict['journal']['title'],
rstDict['journal']['volume'],
rstDict['journal']['issue'],
rstDict['journal']['pubyear'],
rstDict['authors'][0],
rstDict['doi']
)
abstracts = "abstract:\n".join(["{}:\n{}".format(k, v)
for k, v in rstDict['abstract'].items()])
if args.verbose is not None:
txt = txt+abstracts
pyperclip.copy(txt)
print(txt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment