Created
June 29, 2015 08:27
-
-
Save ejmurray/0d5529175bc3b14e87b3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import urllib | |
import xml.etree.ElementTree as ET | |
import cgi | |
import pyperclip | |
import sys | |
# TODO: read the xml file of the article in question to figure out where to add the doi. | |
def escapeToHTML(text, escapeQuotes=False): | |
# unicodeText = unicode(text, 'utf-8') # convert '\xce\xb3' escaping into u'\u03b3' escaping | |
# escape html symbols, like <>& | |
htmlEscapedText = cgi.escape(text, escapeQuotes) | |
# encode non-ascii characters into xhtml entities, like γ | |
htmlEntityText = htmlEscapedText.encode('utf-8', 'xmlcharrefreplace') | |
return htmlEntityText | |
def checkXML(XML, path): | |
if XML.find(path) is not None: | |
if XML.find(path).text is not None: | |
return XML.find(path).text | |
else: | |
return "" | |
else: | |
return "" | |
def digest_authors(authors): | |
author_list = [] | |
for author in authors: | |
author_list.append(checkXML(author, "./Initials") + " " + checkXML(author, "./LastName")) | |
return author_list | |
def digest_issue(issue): | |
issue_string = "" | |
issue_string += checkXML(issue, "./Volume") | |
if issue.find("./Issue") is not None: | |
issue_string += "(" + issue.find("./Issue").text + ")" | |
return issue_string | |
def digest_year(XML): | |
if XML.find("./Article/Journal/JournalIssue/PubDate/Year") is not None: | |
return XML.find("./Article/Journal/JournalIssue/PubDate/Year").text | |
elif XML.find("./Article/Journal/JournalIssue/PubDate/MedlineDate") is not None: | |
return XML.find("./Article/Journal/JournalIssue/PubDate/MedlineDate").text[0:4] | |
else: | |
return "1900" | |
def search_pubmed(term): | |
params= { | |
'db': 'pubmed', | |
'tool': 'test', | |
'email':'test@test.com', | |
'term': term, | |
'usehistory':'y', | |
'retmax':20 | |
} | |
url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' + urllib.urlencode(params) | |
tree = ET.fromstring(urllib.urlopen(url).read()) | |
params['query_key'] = tree.find("./QueryKey").text | |
params['WebEnv'] = tree.find("./WebEnv").text | |
params['retmode'] = 'xml' | |
url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' + urllib.urlencode(params) | |
data = urllib.urlopen(url).read() | |
return data | |
def xml_to_papers(data): | |
tree = ET.fromstring(data) | |
articles = tree.findall("./PubmedArticle/MedlineCitation") | |
papers = [] | |
for article in articles: | |
paper = dict() | |
paper["journal_name"] = article.find("./Article/Journal/ISOAbbreviation").text | |
paper["title"] = article.find("./Article/ArticleTitle").text | |
paper["authors"] = digest_authors(article.findall("./Article/AuthorList/Author")) | |
paper["issue"] = digest_issue(article.find("./Article/Journal/JournalIssue")) | |
paper["year"] = digest_year(article) | |
paper["page_num"] = checkXML(article, "./Article/Pagination/MedlinePgn") | |
paper["pmid"] = article.find("./PMID").text | |
paper["doi"] = checkXML(article, "./ELocationID") | |
# paper["doi"] = article.find("./Journal/ELocationID").text | |
papers.append(paper) | |
return papers | |
def printCV(): | |
# Collect papers in dictionary structure | |
if len(sys.argv) > 1: | |
# Get address from command line. | |
address = ' '.join(sys.argv[1:]) | |
else: | |
# Get address from clipboard. | |
title = pyperclip.paste() # use this to get the title from the clipboard. | |
# webbrowser.open('https://scholar.google.co.uk/scholar?hl=en&q=' + title) | |
# webbrowser.open('http://www.ncbi.nlm.nih.gov/pubmed/?term=' + title) | |
papers = xml_to_papers(search_pubmed(title)) | |
return papers | |
cache = [] | |
papers = printCV() | |
for paper in papers: | |
authorlist = "" | |
num_authors = len(paper["authors"]) | |
for a in range(num_authors): | |
authorlist += escapeToHTML(paper["authors"][a]) | |
if a < num_authors-1: | |
authorlist += ", " | |
else: | |
authorlist += "." | |
print authorlist | |
print (paper["title"]) | |
print "http://www.ncbi.nlm.nih.gov/pubmed/" + paper["pmid"] | |
if paper["journal_name"][-1] == ".": | |
j_title = paper["journal_name"][0:-1] | |
else: | |
j_title = paper["journal_name"] | |
if len(paper["doi"]) > 0: | |
print j_title + " " + paper["year"]+" " + paper["issue"] + ":" + paper["page_num"] + "doi: <a href='http://doi.org/" + paper["doi"] + "'>" + paper["doi"] + " </p>" | |
else: | |
print j_title + ". " + paper["year"]+", " + paper["issue"] + ":" + paper["page_num"] + paper["doi"] + "." | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment