Skip to content

Instantly share code, notes, and snippets.

@fabiobatalha
Last active December 20, 2015 06:49
Show Gist options
  • Save fabiobatalha/6089045 to your computer and use it in GitHub Desktop.
Save fabiobatalha/6089045 to your computer and use it in GitHub Desktop.
API to query DOI's to Crossref according to a given metadata.
# encoding: utf-8
import urllib2
from xml.etree import ElementTree
from xml.etree.ElementTree import Element
from urllib import urlencode
class FindDoi(object):
def __init__(self, format='unixref'):
allowed_formats = ['unixref', 'unixsd', 'xsd_xml']
if format in allowed_formats:
self._format = format
else:
raise ValueError('format %s not allowed' % format)
def extract_doi_from_xml(self, xml):
try:
if self._format == 'unixref':
doi = ElementTree.fromstring(xml).find(".//doi_data/doi").text
elif self._format == 'unixsd':
doi = ElementTree.fromstring(xml).find(".//{http://www.crossref.org/qrschema/3.0}doi").text
elif self._format == 'xsd_xml':
doi = ElementTree.fromstring(xml).find(".//{http://www.crossref.org/qrschema/2.0}doi").text
except AttributeError:
doi = None
return doi
def get_query_batch_xml(self,
key='any',
email_address="crossref@crossref.org",
doi_batch_id="crossref",
issn=None,
journal_title=None,
article_title=None,
author=None,
year=None,
volume=None,
issue=None,
first_page=None):
ET = ElementTree
attrib = {'version': '2.0',
'xmlns': 'http://www.crossref.org/qschema/2.0',
'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
'xsi:schemaLocation': 'http://www.crossref.org/qschema/2.0 file:/Users/fabiobatalha/Trabalho/tmp/crossref/crossref_query_input2.0.xsd'}
equery_batch = Element('query_batch', attrib=attrib)
ehead = Element('head')
eemail_address = Element('email_address')
eemail_address.text = email_address
edoi_batch_id = Element('doi_batch_id')
edoi_batch_id.text = doi_batch_id
ebody = Element('body')
equery = Element('query', attrib={'enable-multiple-hits': 'false',
'forward-match': 'false',
'key': key})
eissn = Element('issn', match='optional')
eissn.text = issn
ejournal_title = Element('journal_title', match='optional')
ejournal_title.text = journal_title
earticle_title = Element('article_title', match='fuzzy')
earticle_title.text = article_title
eauthor = Element('author', match='optional')
eauthor.text = author
eyear = Element('year', match='optional')
eyear.text = year
evolume = Element('volume', match='optional')
evolume.text = volume
eissue = Element('issue')
eissue.text = issue
efirst_page = Element('first_page', match='optional')
efirst_page.text = first_page
equery_batch.append(ehead)
equery_batch.append(ebody)
ehead.append(eemail_address)
ehead.append(edoi_batch_id)
ebody.append(equery)
if issn:
equery.append(eissn)
if author:
equery.append(eauthor)
if volume:
equery.append(evolume)
if issue:
equery.append(eissue)
if first_page:
equery.append(efirst_page)
if year:
equery.append(eyear)
if article_title:
equery.append(earticle_title)
query_xml = ET.tostring(equery_batch, encoding='utf-8', method='xml')
return query_xml
def query_to_crossref(self, query_xml, user=None, passwd=None):
data = {'usr': user,
'pwd': passwd,
'format': self._format,
'qdata': '<?xml version = "1.0" encoding="utf-8"?>%s' % query_xml}
req = urllib2.Request("http://doi.crossref.org/servlet/query", urlencode(data))
return urllib2.urlopen(req).read()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment