Skip to content

Instantly share code, notes, and snippets.

@epoz
Created August 1, 2012 08:29
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save epoz/3225004 to your computer and use it in GitHub Desktop.
Save epoz/3225004 to your computer and use it in GitHub Desktop.
Converting BNE Bibliography ntriples to BibJSON
import sys
import json
import ntriples
from datetime import datetime
import httplib
ES_URL = "localhost:9200"
ES_PATH = "/bibserver/"
field_mapping = {
'http://iflastandards.info/ns/isbd/elements/P1004': 'title',
'http://iflastandards.info/ns/isbd/elements/P1185': 'format',
'http://iflastandards.info/ns/isbd/elements/P1007': 'contributor',
'http://iflastandards.info/ns/isbd/elements/P1016': 'place',
'http://iflastandards.info/ns/isbd/elements/P1117': 'related',
'http://iflastandards.info/ns/isbd/elements/P1018': 'date',
'http://iflastandards.info/ns/isbd/elements/P1022': 'extent',
'http://iflastandards.info/ns/isbd/elements/P1024': 'dimension',
'http://iflastandards.info/ns/isbd/elements/P1040': 'note',
'http://iflastandards.info/ns/isbd/elements/P1045': 'note',
'http://purl.org/dc/terms/language': 'language',
}
def post_data(data):
data['collection'] = 'ES'
data['owner'] = 'test'
data['_created'] = datetime.now().strftime("%Y%m%d%H%M%S")
data['_last_modified'] = datetime.now().strftime("%Y%m%d%H%M%S")
c = httplib.HTTPConnection(ES_URL)
c.request('PUT', ES_PATH+'record/'+data['_id'], json.dumps(data))
class ESSink(object):
def __init__(self, profile=False):
self.data = {}
self.last_id_seen = None
def triple(self, s, p, o):
if s != self.last_id_seen:
if self.data:
post_data(self.data)
self.data = {}
self.last_id_seen = s
self.data['_id'] = '%x' % id(s)
if type(s) is ntriples.bNode:
return
dest_field = field_mapping.get(p, 'misc')
self.data.setdefault(dest_field, []).append(o)
# Make the Collection object
collection = {
"owner": "test",
"_created": datetime.now().strftime("%Y%m%d%H%M%S"),
"_last_modified": datetime.now().strftime("%Y%m%d%H%M%S"),
"_id": "04c19fa1e772ab66f0aad2efe61f25cd",
"collection": "ES",
"label": "Spanish National Library"
}
c = httplib.HTTPConnection(ES_URL)
c.request('PUT', ES_PATH+'collection/'+collection['_id'], json.dumps(collection))
parser = ntriples.NTriplesParser(sink=ESSink())
sink = parser.parse(open(sys.argv[1]))
<http://datos.bne.es/resource/bima0000000003> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://iflastandards.info/ns/fr/frbr/frbrer/C1003> .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1004> "A don Gaspar de Guzman, conde de Olivares, duque de Sanlucar la Mayor ..." .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1185> "[Texto impreso]" .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1007> "por Sevilla don Diego Hurtado de Mendoza, Cavallero de la Orden de Santiago, Vizconde de la Corzana ..." .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1016> "[Sevilla] Impresso por mandado de su Se\u00F1oria en Sevill" .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1117> "por Francisco de Lyra" .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1018> "1631" .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1022> "4 h." .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1024> "Fol." .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1040> "Sign.: A\u00B14\u00F7" .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1045> "En port. grab. xil.: esc. con un rey y dos obispos" .
<http://datos.bne.es/resource/bima0000000003> <http://purl.org/dc/terms/language> <http://lexvo.org/id/iso639-3/spa> .
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment