Skip to content

Instantly share code, notes, and snippets.

@ejmurray
Created June 29, 2015 08:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ejmurray/0d5529175bc3b14e87b3 to your computer and use it in GitHub Desktop.
Save ejmurray/0d5529175bc3b14e87b3 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import urllib
import xml.etree.ElementTree as ET
import cgi
import pyperclip
import sys
# TODO: read the xml file of the article in question to figure out where to add the doi.
def escapeToHTML(text, escapeQuotes=False):
# unicodeText = unicode(text, 'utf-8') # convert '\xce\xb3' escaping into u'\u03b3' escaping
# escape html symbols, like <>&
htmlEscapedText = cgi.escape(text, escapeQuotes)
# encode non-ascii characters into xhtml entities, like &#947;
htmlEntityText = htmlEscapedText.encode('utf-8', 'xmlcharrefreplace')
return htmlEntityText
def checkXML(XML, path):
if XML.find(path) is not None:
if XML.find(path).text is not None:
return XML.find(path).text
else:
return ""
else:
return ""
def digest_authors(authors):
author_list = []
for author in authors:
author_list.append(checkXML(author, "./Initials") + " " + checkXML(author, "./LastName"))
return author_list
def digest_issue(issue):
issue_string = ""
issue_string += checkXML(issue, "./Volume")
if issue.find("./Issue") is not None:
issue_string += "(" + issue.find("./Issue").text + ")"
return issue_string
def digest_year(XML):
if XML.find("./Article/Journal/JournalIssue/PubDate/Year") is not None:
return XML.find("./Article/Journal/JournalIssue/PubDate/Year").text
elif XML.find("./Article/Journal/JournalIssue/PubDate/MedlineDate") is not None:
return XML.find("./Article/Journal/JournalIssue/PubDate/MedlineDate").text[0:4]
else:
return "1900"
def search_pubmed(term):
params= {
'db': 'pubmed',
'tool': 'test',
'email':'test@test.com',
'term': term,
'usehistory':'y',
'retmax':20
}
url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' + urllib.urlencode(params)
tree = ET.fromstring(urllib.urlopen(url).read())
params['query_key'] = tree.find("./QueryKey").text
params['WebEnv'] = tree.find("./WebEnv").text
params['retmode'] = 'xml'
url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' + urllib.urlencode(params)
data = urllib.urlopen(url).read()
return data
def xml_to_papers(data):
tree = ET.fromstring(data)
articles = tree.findall("./PubmedArticle/MedlineCitation")
papers = []
for article in articles:
paper = dict()
paper["journal_name"] = article.find("./Article/Journal/ISOAbbreviation").text
paper["title"] = article.find("./Article/ArticleTitle").text
paper["authors"] = digest_authors(article.findall("./Article/AuthorList/Author"))
paper["issue"] = digest_issue(article.find("./Article/Journal/JournalIssue"))
paper["year"] = digest_year(article)
paper["page_num"] = checkXML(article, "./Article/Pagination/MedlinePgn")
paper["pmid"] = article.find("./PMID").text
paper["doi"] = checkXML(article, "./ELocationID")
# paper["doi"] = article.find("./Journal/ELocationID").text
papers.append(paper)
return papers
def printCV():
# Collect papers in dictionary structure
if len(sys.argv) > 1:
# Get address from command line.
address = ' '.join(sys.argv[1:])
else:
# Get address from clipboard.
title = pyperclip.paste() # use this to get the title from the clipboard.
# webbrowser.open('https://scholar.google.co.uk/scholar?hl=en&q=' + title)
# webbrowser.open('http://www.ncbi.nlm.nih.gov/pubmed/?term=' + title)
papers = xml_to_papers(search_pubmed(title))
return papers
cache = []
papers = printCV()
for paper in papers:
authorlist = ""
num_authors = len(paper["authors"])
for a in range(num_authors):
authorlist += escapeToHTML(paper["authors"][a])
if a < num_authors-1:
authorlist += ", "
else:
authorlist += "."
print authorlist
print (paper["title"])
print "http://www.ncbi.nlm.nih.gov/pubmed/" + paper["pmid"]
if paper["journal_name"][-1] == ".":
j_title = paper["journal_name"][0:-1]
else:
j_title = paper["journal_name"]
if len(paper["doi"]) > 0:
print j_title + " " + paper["year"]+" " + paper["issue"] + ":" + paper["page_num"] + "doi: <a href='http://doi.org/" + paper["doi"] + "'>" + paper["doi"] + " </p>"
else:
print j_title + ". " + paper["year"]+", " + paper["issue"] + ":" + paper["page_num"] + paper["doi"] + "."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment