Skip to content

Instantly share code, notes, and snippets.

@frederik-elwert
Created July 31, 2013 09:49
Show Gist options
  • Save frederik-elwert/6120801 to your computer and use it in GitHub Desktop.
Save frederik-elwert/6120801 to your computer and use it in GitHub Desktop.
Get ISOcat closed DC with all referenced simple DCs
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import argparse
import logging
from urllib.parse import urlencode
from urllib.request import urlopen, Request
from lxml import etree
BASE_URL = 'https://catalog.clarin.eu/isocat/rest/dc/'
VARIABLES = ('profile', 'workingLanguage', 'objectLanguage')
DCIF = 'http://www.isocat.org/ns/dcif'
P_DCIF = '{' + DCIF + '}'
NS = {'dcif': DCIF}
def url_from_args(args):
query_args = {}
for key, value in vars(args).items():
if key in VARIABLES and value:
query_args[key] = value
return '{}{}?{}'.format(BASE_URL, args.id, urlencode(query_args))
def get_datcat(args):
query_url = url_from_args(args)
logging.debug('Requesting {}'.format(query_url))
request = Request(query_url, headers={'Accept': 'application/x-dcif+xml'})
response = urlopen(request)
if response.status != 200:
logging.error('Could not get "{}", error {} {}.'.format(query_url,
response.status, response.reason))
sys.exit(-1)
return response.read()
def save_datcat(args):
logging.info('Request data category with arguments {}'.format(args))
base_doc = get_datcat(args)
selection = etree.fromstring(base_doc)
for pid in selection.xpath('//dcif:conceptualDomain/dcif:value/@pid',
namespaces=NS):
if selection.xpath('//dcif:dataCategory[@pid = $pid][1]', pid=pid,
namespaces=NS):
# dataCategory already present, do nothing
continue
args.id = pid.split('-')[1]
child_doc = get_datcat(args)
child_selection = etree.fromstring(child_doc)
datcat = child_selection.find(P_DCIF + 'dataCategory')
selection.append(datcat)
etree.ElementTree(selection).write(args.outfile, encoding='utf-8',
xml_declaration=True)
def main():
# Parse commandline arguments
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-v', '--verbose', action='store_true')
arg_parser.add_argument('-o', '--outfile', default=sys.stdout.buffer,
type=argparse.FileType('wb'))
for var in VARIABLES:
arg_parser.add_argument('--' + var)
arg_parser.add_argument('id')
args = arg_parser.parse_args()
# Set up logging
if args.verbose:
level = logging.DEBUG
else:
level = logging.ERROR
logging.basicConfig(level=level)
# Return exit value
save_datcat(args)
return 0
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment