fabiobatalha/finddoi.py

## finddoi.py
# encoding: utf-8
import urllib2
from xml.etree import ElementTree
from xml.etree.ElementTree import Element
from urllib import urlencode


class FindDoi(object):

    def __init__(self, format='unixref'):
        allowed_formats = ['unixref', 'unixsd', 'xsd_xml']
        if format in allowed_formats:
            self._format = format
        else:
            raise ValueError('format %s not allowed' % format)

    def extract_doi_from_xml(self, xml):
        try:
            if self._format == 'unixref':
                doi = ElementTree.fromstring(xml).find(".//doi_data/doi").text
            elif self._format == 'unixsd':
                doi = ElementTree.fromstring(xml).find(".//{http://www.crossref.org/qrschema/3.0}doi").text
            elif self._format == 'xsd_xml':
                doi = ElementTree.fromstring(xml).find(".//{http://www.crossref.org/qrschema/2.0}doi").text
        except AttributeError:
            doi = None

        return doi

    def get_query_batch_xml(self,
                            key='any',
                            email_address="crossref@crossref.org",
                            doi_batch_id="crossref",
                            issn=None,
                            journal_title=None,
                            article_title=None,
                            author=None,
                            year=None,
                            volume=None,
                            issue=None,
                            first_page=None):

        ET = ElementTree

        attrib = {'version': '2.0',
                  'xmlns': 'http://www.crossref.org/qschema/2.0',
                  'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
                  'xsi:schemaLocation': 'http://www.crossref.org/qschema/2.0 file:/Users/fabiobatalha/Trabalho/tmp/crossref/crossref_query_input2.0.xsd'}

        equery_batch = Element('query_batch', attrib=attrib)

        ehead = Element('head')
        eemail_address = Element('email_address')
        eemail_address.text = email_address
        edoi_batch_id = Element('doi_batch_id')
        edoi_batch_id.text = doi_batch_id
        ebody = Element('body')
        equery = Element('query', attrib={'enable-multiple-hits': 'false',
                                          'forward-match': 'false',
                                          'key': key})

        eissn = Element('issn', match='optional')
        eissn.text = issn

        ejournal_title = Element('journal_title', match='optional')
        ejournal_title.text = journal_title

        earticle_title = Element('article_title', match='fuzzy')
        earticle_title.text = article_title

        eauthor = Element('author', match='optional')
        eauthor.text = author

        eyear = Element('year', match='optional')
        eyear.text = year

        evolume = Element('volume', match='optional')
        evolume.text = volume

        eissue = Element('issue')
        eissue.text = issue

        efirst_page = Element('first_page', match='optional')
        efirst_page.text = first_page

        equery_batch.append(ehead)
        equery_batch.append(ebody)
        ehead.append(eemail_address)
        ehead.append(edoi_batch_id)
        ebody.append(equery)

        if issn:
            equery.append(eissn)

        if author:
            equery.append(eauthor)

        if volume:
            equery.append(evolume)

        if issue:
            equery.append(eissue)

        if first_page:
            equery.append(efirst_page)

        if year:
            equery.append(eyear)

        if article_title:
            equery.append(earticle_title)

        query_xml = ET.tostring(equery_batch, encoding='utf-8', method='xml')

        return query_xml

    def query_to_crossref(self, query_xml, user=None, passwd=None):

        data = {'usr': user,
                'pwd': passwd,
                'format': self._format,
                'qdata': '<?xml version = "1.0" encoding="utf-8"?>%s' % query_xml}

        req = urllib2.Request("http://doi.crossref.org/servlet/query", urlencode(data))

        return urllib2.urlopen(req).read()
	# encoding: utf-8
	import urllib2
	from xml.etree import ElementTree
	from xml.etree.ElementTree import Element
	from urllib import urlencode


	class FindDoi(object):

	def __init__(self, format='unixref'):
	allowed_formats = ['unixref', 'unixsd', 'xsd_xml']
	if format in allowed_formats:
	self._format = format
	else:
	raise ValueError('format %s not allowed' % format)

	def extract_doi_from_xml(self, xml):
	try:
	if self._format == 'unixref':
	doi = ElementTree.fromstring(xml).find(".//doi_data/doi").text
	elif self._format == 'unixsd':
	doi = ElementTree.fromstring(xml).find(".//{http://www.crossref.org/qrschema/3.0}doi").text
	elif self._format == 'xsd_xml':
	doi = ElementTree.fromstring(xml).find(".//{http://www.crossref.org/qrschema/2.0}doi").text
	except AttributeError:
	doi = None

	return doi

	def get_query_batch_xml(self,
	key='any',
	email_address="crossref@crossref.org",
	doi_batch_id="crossref",
	issn=None,
	journal_title=None,
	article_title=None,
	author=None,
	year=None,
	volume=None,
	issue=None,
	first_page=None):

	ET = ElementTree

	attrib = {'version': '2.0',
	'xmlns': 'http://www.crossref.org/qschema/2.0',
	'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
	'xsi:schemaLocation': 'http://www.crossref.org/qschema/2.0 file:/Users/fabiobatalha/Trabalho/tmp/crossref/crossref_query_input2.0.xsd'}

	equery_batch = Element('query_batch', attrib=attrib)

	ehead = Element('head')
	eemail_address = Element('email_address')
	eemail_address.text = email_address
	edoi_batch_id = Element('doi_batch_id')
	edoi_batch_id.text = doi_batch_id
	ebody = Element('body')
	equery = Element('query', attrib={'enable-multiple-hits': 'false',
	'forward-match': 'false',
	'key': key})

	eissn = Element('issn', match='optional')
	eissn.text = issn

	ejournal_title = Element('journal_title', match='optional')
	ejournal_title.text = journal_title

	earticle_title = Element('article_title', match='fuzzy')
	earticle_title.text = article_title

	eauthor = Element('author', match='optional')
	eauthor.text = author

	eyear = Element('year', match='optional')
	eyear.text = year

	evolume = Element('volume', match='optional')
	evolume.text = volume

	eissue = Element('issue')
	eissue.text = issue

	efirst_page = Element('first_page', match='optional')
	efirst_page.text = first_page

	equery_batch.append(ehead)
	equery_batch.append(ebody)
	ehead.append(eemail_address)
	ehead.append(edoi_batch_id)
	ebody.append(equery)

	if issn:
	equery.append(eissn)

	if author:
	equery.append(eauthor)

	if volume:
	equery.append(evolume)

	if issue:
	equery.append(eissue)

	if first_page:
	equery.append(efirst_page)

	if year:
	equery.append(eyear)

	if article_title:
	equery.append(earticle_title)

	query_xml = ET.tostring(equery_batch, encoding='utf-8', method='xml')

	return query_xml

	def query_to_crossref(self, query_xml, user=None, passwd=None):

	data = {'usr': user,
	'pwd': passwd,
	'format': self._format,
	'qdata': '<?xml version = "1.0" encoding="utf-8"?>%s' % query_xml}

	req = urllib2.Request("http://doi.crossref.org/servlet/query", urlencode(data))

	return urllib2.urlopen(req).read()