Skip to content

Instantly share code, notes, and snippets.

@jamesoutterside
Created September 17, 2012 10:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamesoutterside/05eca8f5995c16eab0ca to your computer and use it in GitHub Desktop.
Save jamesoutterside/05eca8f5995c16eab0ca to your computer and use it in GitHub Desktop.
Script to export DLMs oai resources to an LR node
# Script to export DLMs oai resources to an LR node.
import urllib2
import json
import settings
from oaipmh.client import Client
from oaipmh.metadata import MetadataRegistry, oai_dc_reader
DEBUG = True
def convert_to_envelope(doc, rawMetadata):
#add code here to create the document from the Dublin Core metadata
doc = {
"doc_type": 'resource_data',
"doc_version": "0.23.0",
"active": True,
"resource_data_type": "metadata",
"identity":{
"submitter_type": "user",
"submitter": "Newcastle University",
"curator": "Newcastle University",
"owner": "Newcastle University",
},
"TOS": {
"submission_TOS": "http://www.learningregistry.org/tos/cc0/v0-5/"
},
"resource_locator": 'location',
"keys": ["DC",],
"payload_placement": "inline",
"payload_schema": ["oai_dc"],
"resource_data": rawMetadata,
"publishing_node": 'local',
}
return doc
def acquire_and_publish_documents(oai_url, publish_url, reader, prefix):
registry = MetadataRegistry()
registry.registerReader(prefix, reader)
client = Client(oai_url, registry)
documents = []
count = 0
for record in client.listRecords(metadataPrefix=prefix):
header = record[0]
metadata = record[1]
rawMetadata = urllib2.urlopen("{0}?verb=GetRecord&metadataPrefix={1}&identifier={2}".format(oai_url,prefix,header.identifier())).read()
value = convert_to_envelope(metadata,rawMetadata)
print(dir(header))
if value != None:
documents.append(value)
count += 1
if count % 10 == 0:
publish_documents(publish_url,documents)
documents = []
publish_documents(publish_url,documents)
def publish_documents(publish_url,documents):
data = {'documents':documents}
headers = {"Content-Type":"application/json"}
req = urllib2.Request(publish_url, json.dumps(data),headers)
with open("output.log","a") as f:
f.write(urllib2.urlopen(req).read())
def main():
if DEBUG:
publish_url = 'http://sandbox.learningregistry.org/publish'
else:
publish_url = 'http://alpha.mimas.ac.uk/publish'
oai_url = 'https://learning-maps.ncl.ac.uk/bangor/resources/export/oai/'
acquire_and_publish_documents(oai_url,publish_url,oai_dc_reader,'oai_dc')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment