Last active
February 25, 2021 22:49
-
-
Save hectorcorrea/e1e40a312ce93c1ac5d2ec0be5502373 to your computer and use it in GitHub Desktop.
Quick and dirty script to produce POD files for PUL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# This script will process our Alma MARC XML full dump files | |
# and uploads the data to the POD project. | |
# | |
# Notice that we don't publish all records and/or all the data. | |
# | |
# This script expects the `marcli` utility to be available on the PATH | |
# (https://github.com/hectorcorrea/marcli/releases) | |
# | |
ALMA_DIR="/Users/correah/data/fulldump_expanded" | |
POD_DIR="$ALMA_DIR/pod_files" | |
API_KEY="get-your-key-from-pod" | |
mkdir -p $POD_DIR | |
# rm "$POD_DIR/pod_*.gz" | |
# Creates the new MARC files for POD from the original files from Alma | |
for FILE in $ALMA_DIR/*.xml | |
do | |
BASE_NAME=`basename $FILE` | |
POD_FILE="$POD_DIR/pod_$BASE_NAME" | |
echo "Processing $BASE_NAME" | |
# Export | |
# Only records with a 035 field | |
# That have the word "OCoLC" (somewhere on the record) | |
# Exclude field 583 (private notes for RBSC items) from the output | |
# Output as XML (nicely formated via debug=true) | |
marcli -file=$FILE -hasFields=035 -match=OCoLC -exclude=583 -format=xml -debug=true > $POD_FILE | |
gzip -f $POD_FILE | |
# For testing purposes just process one file | |
break | |
done | |
# Uploads the files to POD | |
# https://pod.stanford.edu/api | |
for FILE in $POD_DIR/*.gz | |
do | |
echo "Uploading $FILE" | |
MIME_TYPE="application/marcxml+xml" | |
curl -F "upload[files][]=@$FILE;type=$MIME_TYPE" \ | |
-H "Authorization: Bearer $API_KEY" \ | |
https://pod.stanford.edu/organizations/princeton/uploads | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# This script will process our Alma MARC XML full dump files | |
# and uploads the data to the POD project. | |
# | |
# Notice that we don't publish all records and/or all the data. | |
# | |
# This script expects the `marcli` utility to be available on the PATH | |
# (https://github.com/hectorcorrea/marcli/releases) | |
# | |
process_marc_file(){ | |
# Export | |
# Only records with a 035 field | |
# That have the word "OCoLC" (somewhere on the record) | |
# Exclude field 583 (private notes for RBSC items) from the output | |
# Output as XML (nicely formated via debug=true) | |
marcli -file=$FILE -hasFields=035 -match=OCoLC -exclude=583 -format=xml -debug=true > $POD_FILE | |
gzip -f $POD_FILE | |
} | |
ALMA_DIR="/Users/correah/data/fulldump_expanded" | |
POD_DIR="$ALMA_DIR/pod_files_parallel" | |
API_KEY="get-your-key-from-pod" | |
CONCURRENT=4 | |
COUNTER=0 | |
mkdir -p $POD_DIR | |
# rm "$POD_DIR/pod_*.gz" | |
# Creates the new MARC files for POD from the original files from Alma | |
for FILE in $ALMA_DIR/*.xml | |
do | |
# Process the next file... | |
COUNTER=$((COUNTER+1)) | |
BASE_NAME=`basename $FILE` | |
POD_FILE="$POD_DIR/pod_$BASE_NAME" | |
echo "Processing $BASE_NAME" | |
process_marc_file $FILE $POD_FILE & | |
if [ "$COUNTER" -eq "$CONCURRENT" ] | |
then | |
# ...wait | |
echo "Waiting at $(date)..." | |
wait | |
COUNTER=0 | |
fi | |
done | |
if [ "$COUNTER" -ne "0" ] | |
then | |
# ...wait for last batch (if any) | |
echo "Waiting last at $(date)..." | |
wait | |
fi | |
# # Uploads the files to POD | |
# # https://pod.stanford.edu/api | |
# for FILE in $POD_DIR/*.gz | |
# do | |
# echo "Uploading $FILE" | |
# MIME_TYPE="application/marcxml+xml" | |
# curl -F "upload[files][]=@$FILE;type=$MIME_TYPE" \ | |
# -H "Authorization: Bearer $API_KEY" \ | |
# https://pod.stanford.edu/organizations/princeton/uploads | |
# done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment