Skip to content

Instantly share code, notes, and snippets.

@jermnelson
Last active August 29, 2015 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jermnelson/872c32d689bfbd6c0fec to your computer and use it in GitHub Desktop.
Save jermnelson/872c32d689bfbd6c0fec to your computer and use it in GitHub Desktop.
BIBCAT Prospector Ingestion Class
Prospector MARC21 Ingester

Prospector to BIBCAT

This gist is a script for ingesting Colorado Alliance of Research Libraries Prospector MARC21 Records into BIBCAT.

{
"9arap": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "ALD"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9arap"
}
],
"bf:label": [
{
"@value": "Arapahoe Library District"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.6088693
}
],
"schema:longitude": [
{
"@value": -104.9238345
}
]
}
],
"schema:url": [
{
"@id": "http://arapahoelibraries.org/"
}
]
},
"9arrp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "APL"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9arrp"
}
],
"bf:label": [
{
"@value": "Aurora Public Library"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": null
}
],
"schema:longitude": [
{
"@value": null
}
]
}
],
"schema:url": [
{
"@id": "http://www.aurorapubliclibrary.org/"
}
]
},
"9aurp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "AUR"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9aurp"
}
],
"bf:label": [
{
"@value": "Auraria"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": 1584
}
],
"schema:latitude": [
{
"@value": 39.7464912
}
],
"schema:longitude": [
{
"@value": -105.0015908
}
]
}
],
"schema:url": [
{
"@id": "http://library.auraria.edu/"
}
]
},
"9bblp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "BLB"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9bblp"
}
],
"bf:label": [
{
"@value": "Boulder and Louisville, Broomfield"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.978047
}
],
"schema:longitude": [
{
"@value": -105.1310009
}
]
}
],
"schema:url": [
{
"@id": "http://louisville-library.org/"
}
]
},
"9bldr": {
"@context": {
"bf": "http://bibframe.org/vocab/",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"schema": "http://schema.org/"
},
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "BPL"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9bldr"
}
],
"bf:label": [
{
"@value": "Boulder Public Library"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": 1632
}
],
"schema:latitude": [
{
"@value": 40.0138317
}
],
"schema:longitude": [
{
"@value": -105.281777
}
]
}
],
"schema:url": [
{
"@id": "https://boulderlibrary.org/"
}
]
},
"9cocp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "CC"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9cocp"
}
],
"bf:label": [
{
"@value": "Colorado College"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 38.8483815
}
],
"schema:longitude": [
{
"@value": -104.8236133
}
]
}
],
"schema:url": [
{
"@id": "https://www.coloradocollege.edu/"
}
]
},
"9csmp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "CSM"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9csmp"
}
],
"bf:label": [
{
"@value": "Colorado School Mines"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.7512711
}
],
"schema:longitude": [
{
"@value": -105.2231857
}
]
}
],
"schema:url": [
{
"@id": "http://www.mines.edu/Library"
}
]
},
"9cspp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "CSU-Pueblo"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9cspp"
}
],
"bf:label": [
{
"@value": "Colorado State University-Pueblo"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": 1503
}
],
"schema:latitude": [
{
"@value": 38.308195
}
],
"schema:longitude": [
{
"@value": -104.5780057
}
]
}
],
"schema:url": [
{
"@id": "http://library.colostate-pueblo.edu/"
}
]
},
"9cspu": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "colorado publications"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9cspu"
}
],
"bf:label": [
{
"@value": "Colorado State Publications"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.7402778
}
],
"schema:longitude": [
{
"@value": -104.9844444
}
]
}
],
"schema:url": [
{
"@id": "http://www.cde.state.co.us/stateinfo/"
}
]
},
"9csup": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "CSU"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9csup"
}
],
"bf:label": [
{
"@value": "Colorado State University"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 40.5725458
}
],
"schema:longitude": [
{
"@value": -105.0843895
}
]
}
],
"schema:url": [
{
"@id": "http://lib.colostate.edu/"
}
]
},
"9cubp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "CUB"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9cubp"
}
],
"bf:label": [
{
"@value": "University of Colorado-Boulder"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": 1645
}
],
"schema:latitude": [
{
"@value": 40.0090872
}
],
"schema:longitude": [
{
"@value": -105.2702586
}
]
}
],
"schema:url": [
{
"@id": "http://www.colorado.edu/"
}
]
},
"9culp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "CU-L"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9culp"
}
],
"bf:label": [
{
"@value": "University of Colorado-Boulder Law"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 40.0011532
}
],
"schema:longitude": [
{
"@value": -105.2625951
}
]
}
],
"schema:url": [
{
"@id": "http://lawlibrary.colorado.edu/"
}
]
},
"9dplp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "DPL"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9dplp"
}
],
"bf:label": [
{
"@value": "Denver Public Library"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.7372024
}
],
"schema:longitude": [
{
"@value": -104.9887046
}
]
}
],
"schema:url": [
{
"@id": "https://www.denverlibrary.org/"
}
]
},
"9dulp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "DU-L"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9dulp"
}
],
"bf:label": [
{
"@value": "University of Denver Law"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.6795567
}
],
"schema:longitude": [
{
"@value": -104.9608134
}
]
}
],
"schema:url": [
{
"@id": "http://www.law.du.edu/index.php/library"
}
]
},
"9dupp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "DU"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9dupp"
}
],
"bf:label": [
{
"@value": "University of Denver"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": 1640
}
],
"schema:latitude": [
{
"@value": 39.6766541
}
],
"schema:longitude": [
{
"@value": -104.962203
}
]
}
],
"schema:url": [
{
"@id": "http://library.du.edu/"
}
]
},
"9fcpp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "FCPL"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9fcpp"
}
],
"bf:label": [
{
"@value": "Fort Collins Public Library"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 40.5852217
}
],
"schema:longitude": [
{
"@value": -105.072418
}
]
}
],
"schema:url": [
{
"@id": "http://www.poudrelibraries.org/"
}
]
},
"9ftlp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "FTL"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9ftlp"
}
],
"bf:label": [
{
"@value": "John F Reed Library, Fort Lewis College"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 7.2759009
}
],
"schema:longitude": [
{
"@value": -107.8673758
}
]
}
],
"schema:url": [
{
"@id": "https://library.fortlewis.edu/"
}
]
},
"9hscp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "UCHSC"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9hscp"
}
],
"bf:label": [
{
"@value": "University of Colorado-Health Sciences"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.7446494
}
],
"schema:longitude": [
{
"@value": -104.8386619
}
]
}
],
"schema:url": [
{
"@id": "http://hslibrary.ucdenver.edu/"
}
]
},
"9jcpp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "JCPL"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9jcpp"
}
],
"bf:label": [
{
"@value": "Jeffco Public"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.7065099
}
],
"schema:longitude": [
{
"@value": -105.0842395
}
]
}
],
"schema:url": [
{
"@id": "http://jefferson.lib.co.us/"
}
]
},
"9lgmt": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9lgmt"
}
],
"bf:label": [
{
"@value": "Longmont Public Library"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": 1518
}
],
"schema:latitude": [
{
"@value": 40.165199
}
],
"schema:longitude": [
{
"@value": "-105.1005734"
}
]
}
],
"schema:url": [
{
"@id": "http://www.longmontcolorado.gov/departments/departments-e-m/library"
}
]
},
"9lsvl": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "LSV"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9lsvl"
}
],
"bf:label": [
{
"@value": "Louisville Public Library"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.978047
}
],
"schema:longitude": [
{
"@value": -105.1310009
}
]
}
],
"schema:url": [
{
"@id": "http://louisville-library.org/"
}
]
},
"9lvpl": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9lvpl"
}
],
"bf:label": [
{
"@value": "Loveland Public Library"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 40.3959
}
],
"schema:longitude": [
{
"@value": -105.0725
}
]
}
],
"schema:url": [
{
"@id": "http://www.cityofloveland.org/index.aspx?page=154"
}
]
},
"9mdel": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "MDE"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9mdel"
}
],
"bf:label": [
{
"@value": "Broomfield Public Library"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": 1626
}
],
"schema:latitude": [
{
"@value": 39.9175555
}
],
"schema:longitude": [
{
"@value": -105.0688028
}
]
}
],
"schema:url": [
{
"@id": "http://ci.broomfield.co.us/index.aspx?NID=276"
}
]
},
"9mscp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "MSC"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9mscp"
}
],
"bf:label": [
{
"@value": "Colorado Mesa University"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.0786716
}
],
"schema:longitude": [
{
"@value": -108.553961
}
]
}
],
"schema:url": [
{
"@id": "http://www.coloradomesa.edu/cmulibrary/index.html"
}
]
},
"9rgsp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "REG"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9rgsp"
}
],
"bf:label": [
{
"@value": "Regis University"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": 1639
}
],
"schema:latitude": [
{
"@value": 39.7905421
}
],
"schema:longitude": [
{
"@value": -105.0335937
}
]
}
],
"schema:url": [
{
"@id": "http://www.regis.edu/Academics/Library.aspx"
}
]
},
"9uccp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "UCCS"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9uccp"
}
],
"bf:label": [
{
"@value": "University of Colorado-Colorado Springs"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 38.8927531
}
],
"schema:longitude": [
{
"@value": -104.8009041
}
]
}
],
"schema:url": [
{
"@id": "http://www.uccs.edu/~library/"
}
]
},
"9uncp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "UNC"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9uncp"
}
],
"bf:label": [
{
"@value": "University of Northern Colorado"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 40.4012886
}
],
"schema:longitude": [
{
"@value": -104.716269
}
]
}
],
"schema:url": [
{
"@id": "http://www.unco.edu/library/"
}
]
},
"9uwyp": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "UWY"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "9uwyp"
}
],
"bf:label": [
{
"@value": "University of Wyoming"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 41.3124897
}
],
"schema:longitude": [
{
"@value": -105.5796209
}
]
}
],
"schema:url": [
{
"@id": "http://www-lib.uwyo.edu/"
}
]
},
"codpl": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "DPL"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "codpl"
}
],
"bf:label": [
{
"@value": "Denver Public Library"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": 39.7377437
}
],
"schema:longitude": [
{
"@value": -104.9880309
}
]
}
],
"schema:url": [
{
"@id": "http://catalog.denverlibrary.org/"
}
]
},
"cr0zz": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "crl"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "cr0zz"
}
],
"bf:label": [
{
"@value": "Center for Research Libraries"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": null
}
],
"schema:latitude": [
{
"@value": null
}
],
"schema:longitude": [
{
"@value": null
}
]
}
],
"schema:url": [
{
"@id": "http://www.crl.edu/"
}
]
},
"pikes-peak": {
"bf:identifier": [
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "UCHSC"
},
{
"bf:identifierAssigner": "Colorado Alliance of Research Libraries",
"bf:identifierValue": "pikes-peak"
}
],
"bf:label": [
{
"@value": "Pikes Peak"
}
],
"rdf:type": "http://bibframe.org/vocab/Organization",
"schema:GeoCoordinates": [
{
"schema:elevation": [
{
"@value": 4303
}
],
"schema:latitude": [
{
"@value": 38.8403867
}
],
"schema:longitude": [
{
"@value": -105.0439711
}
]
}
],
"schema:url": [
{
"@id": "http://www.pikes-peak.com/"
}
]
}
}
"""Helper class for ingesting sample Prospector MARC records into BIBCAT
>> import prospector
"""
__author__ = "Jeremy Nelson"
__license__ = "GPLv3"
import argparse
import datetime
import json
import logging
import os
import pymarc
import random
import rdflib
import requests
import socket
import sys
sys.path.append("E:\\2015\\BIBFRAME-Datastore")
from semantic_server.repository import CONTEXT
from semantic_server.repository.resources.fedora import Resource
from semantic_server.repository.resources.fuseki import TripleStore
from semantic_server.repository.utilities.ingesters import default_graph
from semantic_server.repository.utilities.bibframe import Ingester, guess_search_doc_type
from semantic_server.repository.utilities.bibframe import BIBFRAMESearch
from semantic_server.repository.utilities.namespaces import BF, MADS, RDF, SCHEMA, XSD, FEDORA
BASE_URL = 'http://prospector.coalliance.org'
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
BASE_DIR = os.path.dirname(CURRENT_DIR)
with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),
'carl-orgs.json'),
'r') as file_obj:
CARL_LIBRARIES = json.load(file_obj)
CONFIG = {'FUSEKI': {'port': 3030,
'host': 'localhost',
'datastore': 'bf'},
'FEDORA': {'port': 8080,
'host': 'localhost'},
'ELASTICSEARCH': {'port': 9200,
'host': 'localhost'}}
logging.basicConfig(
filename=os.path.join(BASE_DIR, 'logs', 'error.log'),
level=logging.ERROR)
PREFIX = """PREFIX bf: <{}>
PREFIX fedora: <{}>
PREFIX rdf: <{}>
""".format(BF, FEDORA, RDF)
GET_SLICE_SUBJECTS_SPARQL = """{}
SELECT DISTINCT ?subject ?uuid
WHERE {{{{
?subject fedora:uuid ?uuid .
}}}} LIMIT {{}} OFFSET {{}}""".format(PREFIX)
INSTANCE_SPARQL = """{}
SELECT DISTINCT ?instance
WHERE {{{{
?instance rdf:type bf:Instance .
}}}}""".format(PREFIX)
PROSPECTOR_LIBRARY_SPARQL = """{}
SELECT DISTINCT ?subject
WHERE {{{{
?subject bf:identifier ?code .
?code bf:identifierValue "{{}}"^^<http://www.w3.org/2001/XMLSchema#string> .
}}}}""".format(PREFIX)
def get_local(record):
"""
Uses 945 field to extract library code, bib number, and item number
from record
Args:
record -- pymarc.MARC21 record
"""
output = []
field945s = record.get_fields('945')
for field in field945s:
subfield_a = field['a']
if subfield_a is not None:
data = subfield_a.split(" ")
output.append({'site-code': data[0].strip(),
'ils-bib-number': data[1].strip(),
'ils-item-number': data[2].strip()})
return output
def get_library(code):
triplestore = TripleStore(config=CONFIG)
# Checks for existing Fedora URL
sparql = PROSPECTOR_LIBRARY_SPARQL.format(code)
result = requests.post(
triplestore.query_url,
data={"query": sparql,
"output": "json"})
if result.status_code < 400:
bindings = result.json().get('results').get('bindings')
if len(bindings) > 0:
return rdflib.URIRef(bindings[0]['subject']['value'])
else:
print("Error!! Code: {}\n{}".format(result.status_code, result.text))
def add_or_get_prospector_library(code):
"""Function adds/gets a Library as a BIBFRAME Organization
Args:
code -- CARL code the library
Returns:
rdflib.URIRef of the library
"""
existing_library = get_library(code)
if existing_library is not None:
return existing_library
graph = default_graph()
library = CARL_LIBRARIES[code]
library['@context'] = {'bf': str(BF),
'schema': str(SCHEMA),
'rdf': str(RDF)}
for identifier in library['bf:identifier']:
if not '@type' in identifier:
identifier['@type'] = 'bf:Identifier'
graph.parse(data=json.dumps(library), format='json-ld')
ingester = Ingester(graph=graph, config=CONFIG, base_url=BASE_URL)
ingester.ingest()
ingester.__clean_up__()
return get_library(code)
def load_prospector_libraries():
start = datetime.datetime.utcnow()
print("Starting to load {} Prospector Libraries at {}".format(
len(CARL_LIBRARIES.keys()), start))
for code in CARL_LIBRARIES.keys():
add_or_get_prospector_library(code)
end = datetime.datetime.utcnow()
print("Loaded Prospector Libraries, finished at {}, total time={} min".format(
end,
(end-start).seconds / 60.0))
def clean_up_001(record):
"""Function removes spaces from 001 record, otherwise XQuery Marc2BF will
choke.
Args:
record -- MARC21 record
"""
if record['001'].data.count(" ") > 0:
record['001'].data = record['001'].data.replace(" ", "")
return record
def process_marc_records(filepath):
"""Function iterates through MARC records and runs ingester on
each record.
Args:
filepath -- Filepath to Prospector MARC Records
"""
marc_reader = pymarc.MARCReader(open(filepath, 'rb'), to_unicode=True)
start = datetime.datetime.utcnow()
print("Started processing {} at {}".format(filepath, start))
for i,rec in enumerate(marc_reader):
ingester = RecordIngester(
config=CONFIG,
record=rec,
base_url=BASE_URL)
ingester.ingest()
if not i%10 and i>0:
print(".", end='')
if not i%100:
print(i, end='')
end = datetime.datetime.utcnow()
print("Finished at {} total={}, total time={}, average per min={}".format(
end, i, (end-start).seconds/60.0, (i/(end-start).seconds)/60.0))
def reindex_datastore(limit=1000, offset=0):
start = datetime.datetime.utcnow()
print("Starting Elasticsearch Reindex of datastore at {}".format(
start.isoformat()))
result = requests.post(
"http://localhost:3030/bf/query",
data={"query": GET_SLICE_SUBJECTS_SPARQL.format(limit, offset),
"output": "json"})
if result.status_code < 400:
results = result.json().get('results')
for i,row in enumerate(results['bindings']):
fedora_url = row['subject']['value']
fedora_uri = rdflib.URIRef(fedora_url)
graph = default_graph()
try:
graph.parse(fedora_url)
except:
print("E={}".format(
i), end='')
logging.error("{} Error {} with url={}".format(
datetime.datetime.utcnow().isoformat(),
sys.exc_info(),
fedora_url))
continue
search = BIBFRAMESearch(
config=CONFIG)
doc_type = guess_search_doc_type(graph, fedora_uri)
try:
search.__index__(fedora_uri,
graph,
doc_type,
'bibframe',
'bf')
except:
print("E={}".format(
i), end='')
logging.error("{} Error indexing {}\n{}".format(
datetime.datetime.utcnow().isoformat(),
fedora_url,
sys.exc_info()))
if not i%10 and i>0:
print(".", end="")
if not i%100:
print(i, end="")
if not i%1000 and i>0:
now = datetime.datetime.utcnow()
print(" {} secs ".format((now-start).seconds), end='')
else:
print("Error with fusueki\n{}".format(result.text))
end = datetime.datetime.utcnow()
print("""Finished redindexing of {} subjects offset={} at {}
total mins={}""".format(limit, offset, end.isoformat(), (end-start).seconds /60.0))
def xquery_chain(marc):
"""Function assumes that an instance of <https://github.com/jermnelson/bibframe-socket>
is running on 8089 socket
Args:
marc -- MARC21
Returns:
bf_graph -- BIBFRAME Graph
"""
# Quick 001 hack
marc_xml = pymarc.record_to_xml(clean_up_001(marc), namespace=True)
xquery_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
xquery_server.connect(('localhost', 8089))
xquery_server.sendall(marc_xml + b"\n")
rdf_xml = b''
while 1:
data = xquery_server.recv(1024)
if not data:
break
rdf_xml += data
xquery_server.close()
bf_graph = rdflib.Graph()
bf_graph.parse(data=rdf_xml, format='xml')
return bf_graph
class RecordIngester(Ingester):
"""Class extends basic BIBFRAME ingester to add Prospector HeldItems based on 945
field in a Prospector MARC record"""
def __sample_status__():
# Set sample circulation status
rand_val = random.random()
if rand_val <= .5:
circ_status = rdflib.Literal("AVAILABLE")
elif rand_val <= .75:
circ_status = rdflib.Literal("CHECKED-OUT")
elif rand_val <= .85:
circ_status = rdflib.Literal("LOST")
elif rand_val <= .95:
circ_status = rdflib.Literal("IN-TRANSIT")
else:
circ_status = rdflib.Literal("REPLACED")
return circ_status
def __init__(self, **kwargs):
"""Initializes an instance of `ProspectorIngester`_
Keyword args:
record -- MARC21 record
"""
self.record = kwargs.get('record', None)
self.graph = xquery_chain(self.record)
kwargs['graph'] = self.graph
super(RecordIngester, self).__init__(**kwargs)
def __process_subject__(self, row):
super(RecordIngester, self).__process_subject__(row)
for obj_ in row[1].objects(subject=row[0], predicate=RDF.type):
if obj_ == BF.Instance:
instance=row[0]
self.__add_cover_art__(row)
continue
def __add_held_items__(self, **kwargs):
"""Internal method adds one or more held items to the instance
based on 945 field in the MARC record
Keyword args:
instance -- rdflib.URIRef of Instance
sample-status -- If True, sets random Circ status to HeldItem,
default is True
"""
instance = kwargs.get('instance')
set_sample_status = kwargs.get('sample-status', True)
local_holdings = get_local(self.record)
for i,local in enumerate(local_holdings):
held_item_graph = default_graph()
held_item = rdflib.URIRef("{}instance{}heldItem{}".format(
self.base_url, random.randint(0,10), i))
held_item_graph.add((held_item, RDF.type, BF.HeldItem))
held_item_graph.add((held_item, BF.holdingFor, instance))
prospector_library = add_or_get_prospector_library(local['site-code'])
held_item_graph.add(
(held_item,
BF.heldBy,
prospector_library))
held_item_graph.add(
(held_item,
BF.itemId,
rdflib.Literal(local['ils-item-number'])))
if set_sample_status:
circ_status = RecordIngester.__sample_status__()
held_item_graph.add((held_item, BF.circulationStatus, circ_status))
resource = Resource(self.config, self.searcher)
resource.__create__(rdf=held_item_graph)
def __clean_up__(self):
"""Internal method adds bf:HeldItems and bf:CoverArt after finishing
ingestion"""
super(RecordIngester, self).__clean_up__()
query = self.graph.query(INSTANCE_SPARQL)
for row in query:
local_uri = row[0]
fedora_url = self.searcher.triplestore.__sameAs__(str(local_uri))
if fedora_url:
self.__add_held_items__(instance=rdflib.URIRef(fedora_url))
else:
print("In prospector clean-up, instance {} not found".format(local_uri))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Prospector Ingestion Script')
parser.add_argument('marc_filepath', help='MARC21 file path')
args = parser.parse_args()
process_marc_records(args.marc_filepath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment