Skip to content

Instantly share code, notes, and snippets.

@jermnelson
Created July 13, 2022 17:29
Show Gist options
  • Save jermnelson/bf8e78fdb9675cd877686cd26d253c5a to your computer and use it in GitHub Desktop.
Save jermnelson/bf8e78fdb9675cd877686cd26d253c5a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "4bdc247c-0d14-4606-81f8-4823313b8f66",
"metadata": {},
"source": [
"# SRS Record Streaming\n",
"\n",
"[API Documentation](https://s3.amazonaws.com/foliodocs/api/mod-source-record-storage/p/source-record-storage-stream.html)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "d424ab3c-3ccb-49e8-8841-50a4dad4e6d8",
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import json\n",
"\n",
"import requests\n",
"from folioclient import FolioClient"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "5905d9e1-6c17-4919-b7e4-39066e682720",
"metadata": {},
"outputs": [],
"source": [
"dev_client = FolioClient(\n",
" \"https://okapi-dev.stanford.edu\", \n",
" \"sul\", \n",
" \"sul_admin\", \n",
" \"{ask-us-for-pwd}\")"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "742b0895-f93a-49c2-8972-08dd8748736b",
"metadata": {},
"outputs": [],
"source": [
"def record_stream(url, client):\n",
" session = requests.Session()\n",
" records = []\n",
" start = datetime.datetime.utcnow()\n",
" print(f\"Started stream of MARC JSON {start}\")\n",
" with session.get(url, headers=client.okapi_headers, stream=True) as response:\n",
" for i,line in enumerate(response.iter_lines()):\n",
" records.append(json.loads(line))\n",
" end = datetime.datetime.utcnow()\n",
" print(f\"Finished stream of MARC JSON {end}, total time {(end-start).seconds / 60.}\")\n",
" return records\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "846abcb3-b37b-4147-bdad-c3930299dc3e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Started stream of MARC JSON 2022-07-12 20:54:57.507175\n",
"Finished stream of MARC JSON 2022-07-12 20:56:56.378246, total time 1.9666666666666666\n"
]
}
],
"source": [
"first_recs = record_stream(f\"{dev_client.okapi_url}/source-storage/stream/source-records?updatedAfter=2022-07-01&limit=100000&suppressFromDiscovery=False\", dev_client)"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "8d608278-b793-4d55-8c62-204bf5d13695",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"100000"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(first_recs)"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "19dd6267-3434-42d4-a255-035537de3618",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"{'recordId': '0000da5b-370d-5f2d-aa78-f6fb544038b6',\n",
" 'snapshotId': 'a044ccbc-76dd-42fb-9ad8-9d9404cb88be',\n",
" 'recordType': 'MARC_BIB',\n",
" 'parsedRecord': {'id': '0000da5b-370d-5f2d-aa78-f6fb544038b6',\n",
" 'content': {'fields': [{'001': 'a10451893'},\n",
" {'003': 'SIRSI'},\n",
" {'005': '20160716013742.0'},\n",
" {'007': 'a| |||||'},\n",
" {'007': 'aj|canzn'},\n",
" {'008': '140415s1910 cau|||||| a || | ||eng|d'},\n",
" {'040': {'ind1': ' ',\n",
" 'ind2': ' ',\n",
" 'subfields': [{'a': 'CSt'},\n",
" {'b': 'eng'},\n",
" {'c': 'CSt'},\n",
" {'d': 'UtOrBLW'}]}},\n",
" {'050': {'ind1': ' ',\n",
" 'ind2': '4',\n",
" 'subfields': [{'a': 'G4364.R6 G46 [1910] .N4'}]}},\n",
" {'100': {'ind1': '1',\n",
" 'ind2': ' ',\n",
" 'subfields': [{'a': 'New Richmond Land Company.'}]}},\n",
" {'245': {'ind1': '1',\n",
" 'ind2': '0',\n",
" 'subfields': [{'a': \"Map of Richmond, California. New Richmond Land Company, 801-803 Monadnock Building, San Francisco. Wall's Harbor Center Tract, The Value Center. Bolte & Braden Company, San Francisco\"},\n",
" {'h': '[cartographic material].'}]}},\n",
" {'255': {'ind1': ' ',\n",
" 'ind2': ' ',\n",
" 'subfields': [{'a': 'Scale 1:19,200.'}]}},\n",
" {'264': {'ind1': ' ',\n",
" 'ind2': '1',\n",
" 'subfields': [{'a': 'San Francisco :'},\n",
" {'b': 'Bolte & Braden Company,'},\n",
" {'c': '[1910]'}]}},\n",
" {'300': {'ind1': ' ',\n",
" 'ind2': ' ',\n",
" 'subfields': [{'a': '1 map ;'}, {'c': '43.5 x 62 cm'}]}},\n",
" {'336': {'ind1': ' ',\n",
" 'ind2': ' ',\n",
" 'subfields': [{'a': 'cartographic image'}, {'2': 'rdacontent'}]}},\n",
" {'337': {'ind1': ' ',\n",
" 'ind2': ' ',\n",
" 'subfields': [{'a': 'unmediated'}, {'2': 'rdamedia'}]}},\n",
" {'338': {'ind1': ' ',\n",
" 'ind2': ' ',\n",
" 'subfields': [{'a': 'sheet'}, {'2': 'rdacarrier'}]}},\n",
" {'500': {'ind1': ' ',\n",
" 'ind2': ' ',\n",
" 'subfields': [{'a': \"A real estate development map. Date is estimated. Map shows the Wall's Harbor Center Tract in red, and the City Hall Site and Civic Center in green.\"}]}},\n",
" {'500': {'ind1': ' ', 'ind2': ' ', 'subfields': [{'a': 'Separate Map.'}]}},\n",
" {'650': {'ind1': ' ',\n",
" 'ind2': '0',\n",
" 'subfields': [{'a': 'Real estate development'},\n",
" {'z': 'California'},\n",
" {'z': 'Richmond'},\n",
" {'x': 'Maps'},\n",
" {'y': '20th century.'}]}},\n",
" {'650': {'ind1': ' ',\n",
" 'ind2': '0',\n",
" 'subfields': [{'a': 'City blocks'},\n",
" {'z': 'California'},\n",
" {'z': 'Richmond'},\n",
" {'x': 'Maps'},\n",
" {'y': '20th century.'}]}},\n",
" {'651': {'ind1': ' ',\n",
" 'ind2': '0',\n",
" 'subfields': [{'a': 'California'},\n",
" {'v': 'Maps'},\n",
" {'y': '20th century.'}]}},\n",
" {'651': {'ind1': ' ',\n",
" 'ind2': '0',\n",
" 'subfields': [{'a': 'Richmond (Calif.)'},\n",
" {'x': 'Maps'},\n",
" {'y': '20th century.'}]}},\n",
" {'655': {'ind1': ' ',\n",
" 'ind2': '7',\n",
" 'subfields': [{'a': 'Maps'}, {'2': 'lcgft'}]}},\n",
" {'655': {'ind1': ' ',\n",
" 'ind2': '7',\n",
" 'subfields': [{'a': 'Cadastral maps.'}, {'2': 'lcgft'}]}},\n",
" {'590': {'ind1': ' ',\n",
" 'ind2': ' ',\n",
" 'subfields': [{'a': 'Pub list no.: 5312.000.'}]}},\n",
" {'690': {'ind1': ' ',\n",
" 'ind2': '4',\n",
" 'subfields': [{'a': 'The David Rumsey Map Collection.'}]}},\n",
" {'856': {'ind1': '4',\n",
" 'ind2': '0',\n",
" 'subfields': [{'u': 'http://www.davidrumsey.com/luna/servlet/view/search?q=pub_list_no=5312.000%20LIMIT:RUMSEY~8~1&sort=Pub_List_No_InitialSort,Pub_Date,Pub_List_No,Series_No'}]}},\n",
" {'856': {'ind1': '4',\n",
" 'ind2': '1',\n",
" 'subfields': [{'u': 'http://purl.stanford.edu/yv535wk5017'},\n",
" {'x': 'SDR-PURL'},\n",
" {'x': 'item'},\n",
" {'x': 'file:yv535wk5017%2F5312000.jp2'},\n",
" {'x': 'collection:xh235dd9059::David Rumsey Map Collection at Stanford University Libraries'}]}},\n",
" {'035': {'ind1': ' ',\n",
" 'ind2': ' ',\n",
" 'subfields': [{'a': '(OCoLC-M)953571215'}]}},\n",
" {'596': {'ind1': ' ', 'ind2': ' ', 'subfields': [{'a': '13'}]}},\n",
" {'918': {'ind1': ' ', 'ind2': ' ', 'subfields': [{'a': '10451893'}]}},\n",
" {'999': {'ind1': 'f',\n",
" 'ind2': 'f',\n",
" 'subfields': [{'i': 'b6919f84-f7f3-5957-84d6-fcf603aaacbb'},\n",
" {'s': '0000da5b-370d-5f2d-aa78-f6fb544038b6'}]}}],\n",
" 'leader': '01834nem a2200397uu 4500'}},\n",
" 'deleted': False,\n",
" 'externalIdsHolder': {'instanceId': 'b6919f84-f7f3-5957-84d6-fcf603aaacbb',\n",
" 'instanceHrid': 'a10451893'},\n",
" 'additionalInfo': {'suppressDiscovery': False},\n",
" 'metadata': {'createdDate': 1657579301983,\n",
" 'createdByUserId': 'd6507bbe-e985-4baa-beb4-d7ca4d6c279f',\n",
" 'updatedDate': 1657579303250,\n",
" 'updatedByUserId': 'd6507bbe-e985-4baa-beb4-d7ca4d6c279f'}}"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"first_recs[0]"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "8a992fad-e7b1-488b-a636-b15528d1a838",
"metadata": {},
"outputs": [],
"source": [
"with open(\"folio-srs-10k.json\", \"w+\") as fo:\n",
" json.dump(first_recs, fo)"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "4ed2d027-b6f7-4474-aa9b-f41180753804",
"metadata": {},
"outputs": [],
"source": [
"from pymarc import parse_json_to_array"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "681fd0ce-29c3-4605-8477-57737c2cf7dc",
"metadata": {},
"outputs": [],
"source": [
"rtac_url = f\"{dev_client.okapi_url}/rtac-batch\""
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "e4c01205-fef1-488a-bd61-6fcf1f020866",
"metadata": {},
"outputs": [],
"source": [
"result =requests.post(rtac_url,\n",
" headers=dev_client.okapi_headers,\n",
" json={ \"instanceIds\": ['b6919f84-f7f3-5957-84d6-fcf603aaacbb'],\n",
" \"fullPeriodicals\" : 'false'})"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "fe6ff772-e6f5-4713-9159-416af159e2bc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"200"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result.status_code"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "4eb5d26f-8010-4478-80a5-47d8deb9d99b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'holdings': [{'instanceId': 'b6919f84-f7f3-5957-84d6-fcf603aaacbb',\n",
" 'holdings': [{'id': 'e90af420-7ed2-54ba-b297-0c871d676b7f',\n",
" 'location': 'Map Center (W7 large box)',\n",
" 'callNumber': 'G4364.R6 G46 [1910] .N4',\n",
" 'status': 'Available',\n",
" 'permanentLoanType': 'Can circulate'}]}]}"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result.json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8296473-6937-4102-96b5-41cf1e05a1b3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment