Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Automating fulltext extraction in the Alma Digital Repository
#!/bin/sh
AWS_BUCKET="na-st01.ext.exlibrisgroup.com"
FILENAME=`basename "$1"`
FOLDER=$((1 + RANDOM % 32767))
if [ $# -lt 2 ]; then
echo "Usage: `basename $0` file-name representation-id institution [file-label]"
exit 0
fi
echo Uploading $1
aws s3 cp --quiet "$1" "s3://$AWS_BUCKET/$3/upload/$FOLDER/$FILENAME"
# Getting BIB for rep id $2
BIB=`curl -s -H "Authorization: apikey $ALMA_APIKEY" -H "Accept: application/xml" "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs?representation_id=$2&view=brief" | xmllint --xpath "string(/bibs/bib/mms_id)" -`
echo Adding file to representation
curl -s -o /dev/null -X POST -H "Authorization: apikey $ALMA_APIKEY" -H "Content-type: application/xml" -H "Accept: application/json" --data "<representation_file><label>$4</label><path>$3/upload/$FOLDER/$FILENAME</path></representation_file>" "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs/$BIB/representations/$2/files"
#!/bin/sh
JOB_ID=M50157;
INSTITUTION='TR_INTEGRATION_INST'
if [ $# -lt 2 ]; then
echo "Usage: `basename $0` mms_id directory"
exit 0
fi
files=("$2"/*)
label=$(basename $files);
label=`echo ${label%.*} | grep -o '.*[^0-9]'`
echo "Adding representation to mms_id $1"
REP_ID=`curl -s --fail -X POST -H "Authorization: apikey $ALMA_APIKEY" -H "Content-type: application/json" -H "Accept: application/json" --data "{\"library\":{\"value\":\"MAIN\"}, \"is_remote\": \"false\",\"label\": \"$label\",\"usage_type\":{\"value\":\"PRESERVATION_MASTER\"}}" "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs/$1/representations" | jq -r '.id'`
echo "Adding files to representation $REP_ID"
i=1;
for file in "$2"/*; do
./add_file.sh "$file" $REP_ID $INSTITUTION "Page $i"
((i++));
done
echo "Creating new set"
SET_ID=`curl -s --fail -X POST -H "Authorization: apikey $ALMA_APIKEY" -H "Content-type: application/json" -H "Accept: application/json" --data '{"name": "Digital files for fulltext","type": {"value": "ITEMIZED"},"content": {"value": "FILE"}}' "https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/sets" | jq -r '.id'`
res=$?
if test $res != 0; then
echo "HTTP request failed with return code $res"
exit $res
fi
echo "Adding files to set"
MEMBERS=`curl -s -H "Authorization: apikey $ALMA_APIKEY" -H "Accept: application/json" "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs/$1/representations/$REP_ID/files" | jq -c '{members:{member: (.representation_file | map({id:.pid}))}}'`
curl -s -o /dev/null -X POST -H "Authorization: apikey $ALMA_APIKEY" -H "Content-type: application/json" -H "Accept: application/json" --data $MEMBERS "https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/sets/$SET_ID?op=replace_members"
echo "Running job"
jq -c ".parameter |= map(if .name.value == \"set_id\" then (.value=\"$SET_ID\") else . end)" job.json > job.tmp && mv job.tmp job.json
./run_job.sh $JOB_ID job.json
echo "Deleting set $SET_ID"
curl -s -X DELETE -H "Authorization: apikey $ALMA_APIKEY" "https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/sets/$SET_ID"
echo "Opening new representation"
open "https://na01.alma.exlibrisgroup.com/view/BookReaderViewer/$INSTITUTION/$REP_ID"
{
"parameter": [
{
"name": {
"value": "task_ExtractFulltext_overwriteProvided"
},
"value": "false"
},
{
"name": {
"value": "set_id"
},
"value": "5617966300000561"
},
{
"name": {
"value": "job_name"
},
"value": "Extract Fulltext - via API - Digital files for full text extraction"
}
]
}
#!/bin/sh
if [ $# -lt 2 ]; then
echo "Usage: `basename $0` job-id parameters-file"
exit 0
fi
echo "Submitting the job"
JOB_INSTANCE_URL=`curl -s --fail -X POST -H "Authorization: apikey $ALMA_APIKEY" -H "Content-type: application/json" -H "Accept: application/json" --data @$2 "https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/jobs/$1?op=run" | jq --raw-output '.additional_info.link'`
res=$?
if test $res != 0; then
echo "HTTP request failed with return code $res"
exit $res
fi
echo "Checking the job status at $JOB_INSTANCE_URL"
until [[ "$JOB_STATUS" == "COMPLETED"* ]]; do
sleep 3
curl -s --fail -H "Authorization: apikey $ALMA_APIKEY" -H "Accept: application/xml" "$JOB_INSTANCE_URL" > job_instance.xml
JOB_PROGRESS=`xmllint --xpath '/job_instance/progress/text()' job_instance.xml`
JOB_STATUS=`xmllint --xpath '/job_instance/status/text()' job_instance.xml`
echo "Job progress: $JOB_PROGRESS; Job status: $JOB_STATUS"
done
if [ "$JOB_STATUS" = "COMPLETED_SUCCESS" ]; then
exit 0;
else
echo "Job not completed successfully."
exit 1;
fi
@jweisman

This comment has been minimized.

Copy link
Owner Author

jweisman commented May 2, 2019

Expects the following environment variables:

  • ALMA_APIKEY
  • AWS_ACCESS_KEY_ID
  • AWS_SECRET_ACCESS_KEY

Run with 2 parameters: MMS_ID and the directory which contains the files, as follows:

./fulltext.sh 99490541500561 files/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.