Skip to content

Instantly share code, notes, and snippets.

@jweisman
Last active November 13, 2020 13:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jweisman/0ebeecc428273765caf9e4fc0c590a21 to your computer and use it in GitHub Desktop.
Save jweisman/0ebeecc428273765caf9e4fc0c590a21 to your computer and use it in GitHub Desktop.
Upload digital files programmatically using Alma APIs
99509041500561 logo.png
99509041400561 logo.png
99509041300561 logo.png
99509041200561 logo.png
99508941400561 logo.png
99509041100561 logo.png
99508841400561 logo.png
import os, requests, boto3, uuid, csv, sys
from requests.models import HTTPError
from multiprocessing import Pool
S3 = boto3.resource('s3')
INST_CODE = 'TR_INTEGRATION_INST'
LIBRARY_CODE = 'MAIN'
THREADS = 3
URL = 'https://api-na.hosted.exlibrisgroup.com'
HEADERS = {
'authorization': f"apikey {os.environ['ALMA_APIKEY']}",
'accept': 'application/json',
'content-type': 'application/json',
}
def fix_url(url):
return url if url.startswith('https://') else URL + url
def get(url):
response = requests.get(fix_url(url), headers=HEADERS)
handle_http_error(response)
return response.json()
def post(url, body):
response = requests.post(fix_url(url), json=body, headers=HEADERS)
handle_http_error(response)
return response.json()
def add_rep(mms_id):
rep = {
"is_remote": False,
"library": { "value": LIBRARY_CODE },
"usage_type": { "value": "PRESERVATION_MASTER" }
}
return post(f"/almaws/v1/bibs/{mms_id}/representations", rep)
def add_file(mms_id, rep_id, path):
file = { "path": path }
return post(f"/almaws/v1/bibs/{mms_id}/representations/{rep_id}/files", file)
def upload_file(path):
bucket = S3.Bucket('na-st01.ext.exlibrisgroup.com')
kwargs = {'Body': open(path, 'rb'), 'Key': f"{INST_CODE}/upload/migration/{uuid.uuid4()}/{os.path.basename(path)}"}
bucket.put_object(**kwargs)
return kwargs['Key']
def handle_http_error(resp):
try:
resp.raise_for_status()
except HTTPError as err:
msg=''
try:
msg = err.response.json()['errorList']['error'][0]['errorMessage']
except ValueError:
msg = RuntimeError(err.response.text)
if (msg): raise RuntimeError(msg) from None
else: raise
def get_next_line():
with open(sys.argv[1], newline='') as csvfile:
filereader = csv.reader(csvfile)
for row in filereader:
yield row
def process_line(l):
print('Processing line:', l)
rep = add_rep(l[0])
key = upload_file(l[1])
print('Uploaded file', key)
file = add_file(l[0], rep["id"], key)
print('Added file to rep', file['pid'])
def main():
f = get_next_line()
t = Pool(processes=THREADS)
for i in f:
t.apply_async(process_line, (i,), error_callback=handle_error)
t.close()
t.join()
def handle_error(e):
print('An error occurred:', e)
if __name__ == '__main__':
if len(sys.argv) <= 1:
print("Missing CSV file")
exit()
main()
@jweisman
Copy link
Author

Be sure your API key is in the environment (ALMA_APIKEY). Uses the default AWS credential profile (or specify a profile with the AWS_PROFILE environment variable).

E.g.:

$ AWS_PROFILE=TR_INTEGRATION_INST python upload.py files.csv

See this blog post for more details.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment