public
Last active

Converting BNE Bibliography ntriples to BibJSON

  • Download Gist
convert.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
import sys
import json
import ntriples
from datetime import datetime
import httplib
 
ES_URL = "localhost:9200"
ES_PATH = "/bibserver/"
 
field_mapping = {
'http://iflastandards.info/ns/isbd/elements/P1004': 'title',
'http://iflastandards.info/ns/isbd/elements/P1185': 'format',
'http://iflastandards.info/ns/isbd/elements/P1007': 'contributor',
'http://iflastandards.info/ns/isbd/elements/P1016': 'place',
'http://iflastandards.info/ns/isbd/elements/P1117': 'related',
'http://iflastandards.info/ns/isbd/elements/P1018': 'date',
'http://iflastandards.info/ns/isbd/elements/P1022': 'extent',
'http://iflastandards.info/ns/isbd/elements/P1024': 'dimension',
'http://iflastandards.info/ns/isbd/elements/P1040': 'note',
'http://iflastandards.info/ns/isbd/elements/P1045': 'note',
'http://purl.org/dc/terms/language': 'language',
}
 
def post_data(data):
data['collection'] = 'ES'
data['owner'] = 'test'
data['_created'] = datetime.now().strftime("%Y%m%d%H%M%S")
data['_last_modified'] = datetime.now().strftime("%Y%m%d%H%M%S")
c = httplib.HTTPConnection(ES_URL)
c.request('PUT', ES_PATH+'record/'+data['_id'], json.dumps(data))
 
class ESSink(object):
def __init__(self, profile=False):
self.data = {}
self.last_id_seen = None
 
def triple(self, s, p, o):
if s != self.last_id_seen:
if self.data:
post_data(self.data)
self.data = {}
self.last_id_seen = s
self.data['_id'] = '%x' % id(s)
if type(s) is ntriples.bNode:
return
dest_field = field_mapping.get(p, 'misc')
self.data.setdefault(dest_field, []).append(o)
 
# Make the Collection object
collection = {
"owner": "test",
"_created": datetime.now().strftime("%Y%m%d%H%M%S"),
"_last_modified": datetime.now().strftime("%Y%m%d%H%M%S"),
"_id": "04c19fa1e772ab66f0aad2efe61f25cd",
"collection": "ES",
"label": "Spanish National Library"
}
c = httplib.HTTPConnection(ES_URL)
c.request('PUT', ES_PATH+'collection/'+collection['_id'], json.dumps(collection))
 
parser = ntriples.NTriplesParser(sink=ESSink())
sink = parser.parse(open(sys.argv[1]))
sample.nt
1 2 3 4 5 6 7 8 9 10 11 12
<http://datos.bne.es/resource/bima0000000003> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://iflastandards.info/ns/fr/frbr/frbrer/C1003> .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1004> "A don Gaspar de Guzman, conde de Olivares, duque de Sanlucar la Mayor ..." .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1185> "[Texto impreso]" .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1007> "por Sevilla don Diego Hurtado de Mendoza, Cavallero de la Orden de Santiago, Vizconde de la Corzana ..." .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1016> "[Sevilla] Impresso por mandado de su Se\u00F1oria en Sevill" .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1117> "por Francisco de Lyra" .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1018> "1631" .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1022> "4 h." .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1024> "Fol." .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1040> "Sign.: A\u00B14\u00F7" .
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1045> "En port. grab. xil.: esc. con un rey y dos obispos" .
<http://datos.bne.es/resource/bima0000000003> <http://purl.org/dc/terms/language> <http://lexvo.org/id/iso639-3/spa> .

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.