Converting BNE Bibliography ntriples to BibJSON
import sys | |
import json | |
import ntriples | |
from datetime import datetime | |
import httplib | |
ES_URL = "localhost:9200" | |
ES_PATH = "/bibserver/" | |
field_mapping = { | |
'http://iflastandards.info/ns/isbd/elements/P1004': 'title', | |
'http://iflastandards.info/ns/isbd/elements/P1185': 'format', | |
'http://iflastandards.info/ns/isbd/elements/P1007': 'contributor', | |
'http://iflastandards.info/ns/isbd/elements/P1016': 'place', | |
'http://iflastandards.info/ns/isbd/elements/P1117': 'related', | |
'http://iflastandards.info/ns/isbd/elements/P1018': 'date', | |
'http://iflastandards.info/ns/isbd/elements/P1022': 'extent', | |
'http://iflastandards.info/ns/isbd/elements/P1024': 'dimension', | |
'http://iflastandards.info/ns/isbd/elements/P1040': 'note', | |
'http://iflastandards.info/ns/isbd/elements/P1045': 'note', | |
'http://purl.org/dc/terms/language': 'language', | |
} | |
def post_data(data): | |
data['collection'] = 'ES' | |
data['owner'] = 'test' | |
data['_created'] = datetime.now().strftime("%Y%m%d%H%M%S") | |
data['_last_modified'] = datetime.now().strftime("%Y%m%d%H%M%S") | |
c = httplib.HTTPConnection(ES_URL) | |
c.request('PUT', ES_PATH+'record/'+data['_id'], json.dumps(data)) | |
class ESSink(object): | |
def __init__(self, profile=False): | |
self.data = {} | |
self.last_id_seen = None | |
def triple(self, s, p, o): | |
if s != self.last_id_seen: | |
if self.data: | |
post_data(self.data) | |
self.data = {} | |
self.last_id_seen = s | |
self.data['_id'] = '%x' % id(s) | |
if type(s) is ntriples.bNode: | |
return | |
dest_field = field_mapping.get(p, 'misc') | |
self.data.setdefault(dest_field, []).append(o) | |
# Make the Collection object | |
collection = { | |
"owner": "test", | |
"_created": datetime.now().strftime("%Y%m%d%H%M%S"), | |
"_last_modified": datetime.now().strftime("%Y%m%d%H%M%S"), | |
"_id": "04c19fa1e772ab66f0aad2efe61f25cd", | |
"collection": "ES", | |
"label": "Spanish National Library" | |
} | |
c = httplib.HTTPConnection(ES_URL) | |
c.request('PUT', ES_PATH+'collection/'+collection['_id'], json.dumps(collection)) | |
parser = ntriples.NTriplesParser(sink=ESSink()) | |
sink = parser.parse(open(sys.argv[1])) |
<http://datos.bne.es/resource/bima0000000003> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://iflastandards.info/ns/fr/frbr/frbrer/C1003> . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1004> "A don Gaspar de Guzman, conde de Olivares, duque de Sanlucar la Mayor ..." . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1185> "[Texto impreso]" . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1007> "por Sevilla don Diego Hurtado de Mendoza, Cavallero de la Orden de Santiago, Vizconde de la Corzana ..." . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1016> "[Sevilla] Impresso por mandado de su Se\u00F1oria en Sevill" . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1117> "por Francisco de Lyra" . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1018> "1631" . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1022> "4 h." . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1024> "Fol." . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1040> "Sign.: A\u00B14\u00F7" . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1045> "En port. grab. xil.: esc. con un rey y dos obispos" . | |
<http://datos.bne.es/resource/bima0000000003> <http://purl.org/dc/terms/language> <http://lexvo.org/id/iso639-3/spa> . |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment