Script to query OpenDoar for science sites in English that allow free access to their metadata; parses the XML into JSON
import urllib2
import xml.etree.ElementTree as etree
import json
# Query for sites in English that have science data, an OAI URL, and allow free access to metadata.
resp = urllib2.urlopen(',12&subject=Ca,Ce,Cur,Cuv')
data =
tree = etree.fromstring(data)
l = []
for child in tree.iter('repository'):
d = {}
for item in child:
if item.tag in ['rUrl', 'rOaiBaseUrl', 'rName']:
d[item.tag] = item.text
# Prints a list of all returned repositories' names, URLs, OAI URLs in JSON format (list of dicts)
print json.dumps(l)
