Created
August 1, 2012 08:29
-
-
Save epoz/3225004 to your computer and use it in GitHub Desktop.
Converting BNE Bibliography ntriples to BibJSON
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import json | |
import ntriples | |
from datetime import datetime | |
import httplib | |
ES_URL = "localhost:9200" | |
ES_PATH = "/bibserver/" | |
field_mapping = { | |
'http://iflastandards.info/ns/isbd/elements/P1004': 'title', | |
'http://iflastandards.info/ns/isbd/elements/P1185': 'format', | |
'http://iflastandards.info/ns/isbd/elements/P1007': 'contributor', | |
'http://iflastandards.info/ns/isbd/elements/P1016': 'place', | |
'http://iflastandards.info/ns/isbd/elements/P1117': 'related', | |
'http://iflastandards.info/ns/isbd/elements/P1018': 'date', | |
'http://iflastandards.info/ns/isbd/elements/P1022': 'extent', | |
'http://iflastandards.info/ns/isbd/elements/P1024': 'dimension', | |
'http://iflastandards.info/ns/isbd/elements/P1040': 'note', | |
'http://iflastandards.info/ns/isbd/elements/P1045': 'note', | |
'http://purl.org/dc/terms/language': 'language', | |
} | |
def post_data(data): | |
data['collection'] = 'ES' | |
data['owner'] = 'test' | |
data['_created'] = datetime.now().strftime("%Y%m%d%H%M%S") | |
data['_last_modified'] = datetime.now().strftime("%Y%m%d%H%M%S") | |
c = httplib.HTTPConnection(ES_URL) | |
c.request('PUT', ES_PATH+'record/'+data['_id'], json.dumps(data)) | |
class ESSink(object): | |
def __init__(self, profile=False): | |
self.data = {} | |
self.last_id_seen = None | |
def triple(self, s, p, o): | |
if s != self.last_id_seen: | |
if self.data: | |
post_data(self.data) | |
self.data = {} | |
self.last_id_seen = s | |
self.data['_id'] = '%x' % id(s) | |
if type(s) is ntriples.bNode: | |
return | |
dest_field = field_mapping.get(p, 'misc') | |
self.data.setdefault(dest_field, []).append(o) | |
# Make the Collection object | |
collection = { | |
"owner": "test", | |
"_created": datetime.now().strftime("%Y%m%d%H%M%S"), | |
"_last_modified": datetime.now().strftime("%Y%m%d%H%M%S"), | |
"_id": "04c19fa1e772ab66f0aad2efe61f25cd", | |
"collection": "ES", | |
"label": "Spanish National Library" | |
} | |
c = httplib.HTTPConnection(ES_URL) | |
c.request('PUT', ES_PATH+'collection/'+collection['_id'], json.dumps(collection)) | |
parser = ntriples.NTriplesParser(sink=ESSink()) | |
sink = parser.parse(open(sys.argv[1])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<http://datos.bne.es/resource/bima0000000003> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://iflastandards.info/ns/fr/frbr/frbrer/C1003> . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1004> "A don Gaspar de Guzman, conde de Olivares, duque de Sanlucar la Mayor ..." . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1185> "[Texto impreso]" . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1007> "por Sevilla don Diego Hurtado de Mendoza, Cavallero de la Orden de Santiago, Vizconde de la Corzana ..." . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1016> "[Sevilla] Impresso por mandado de su Se\u00F1oria en Sevill" . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1117> "por Francisco de Lyra" . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1018> "1631" . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1022> "4 h." . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1024> "Fol." . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1040> "Sign.: A\u00B14\u00F7" . | |
<http://datos.bne.es/resource/bima0000000003> <http://iflastandards.info/ns/isbd/elements/P1045> "En port. grab. xil.: esc. con un rey y dos obispos" . | |
<http://datos.bne.es/resource/bima0000000003> <http://purl.org/dc/terms/language> <http://lexvo.org/id/iso639-3/spa> . |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment