Skip to content

Instantly share code, notes, and snippets.

@ianfieldhouse
Created May 8, 2015 10:39
Show Gist options
  • Save ianfieldhouse/20e32f7262108f457803 to your computer and use it in GitHub Desktop.
Save ianfieldhouse/20e32f7262108f457803 to your computer and use it in GitHub Desktop.
check geonetwork csw harvest
#!/bin/bash
SCRIPT_VERSION=1.0.5
# service url and parameters
HOST='http://scotsdi.edina.ac.uk'
ENDPOINT='/geonetwork/srv/eng/csw'
SERVICE='CSW'
REQUEST='GetRecords'
CONSTRAINT_LANGUAGE='CQL_TEXT'
TYPE_NAMES='csw%3ARecord'
RESULT_TYPE='results'
VERSION='2.0.2'
ESN='brief'
OFFSET=20
BASE_URL="$HOST$ENDPOINT?service=$SERVICE&request=$REQUEST&\
constraintLanguage=$CONSTRAINT_LANGUAGE&typeNames=$TYPE_NAMES&\
resultType=$RESULT_TYPE&version=$VERSION"
usage() {
echo -e "Usage: $0 [-o <int|default=20>]\ne.g.:\n$0\n$0 -o 50" 1>&2
exit 1
}
while getopts ":o:v" opt; do
case $opt in
o)
OFFSET=${OPTARG}
;;
v)
echo version $SCRIPT_VERSION
exit 0
;;
*)
usage
;;
esac
done
shift $((OPTIND-1))
echo $SERVICE harvest from $HOST$ENDPOINT with offset of $OFFSET
# temporary file to hold identifiers
FILE='/tmp/identifiers.txt'
# empty temporary file if it already exists
if [ -e "$FILE" ]
then
cat /dev/null > $FILE
fi
# determine total number of records to harvest
NUM_RECORDS=`curl -s $BASE_URL |\
grep numberOfRecordsMatched |\
sed -n 's/.*numberOfRecordsMatched="\([0-9]*\)".*/\1/p'`
echo -e "\nNumber of records to harvest: $NUM_RECORDS\n"
# save record identifiers to temporaray file
RECORDS_LEFT=$NUM_RECORDS
for (( i=1; i<=$NUM_RECORDS; i=i+$OFFSET ))
do
if [ "$RECORDS_LEFT" -gt "$OFFSET" ]
then
NEXT_RECORD_BATCH=$OFFSET
else
NEXT_RECORD_BATCH=$RECORDS_LEFT
fi
echo "Harvesting $NEXT_RECORD_BATCH records from $i"
CURL_URL="$BASE_URL&esn=$ESN&maxrecords=$OFFSET&startposition=$i"
curl -s $CURL_URL | grep dc:identifier >> $FILE
RECORDS_LEFT=$((RECORDS_LEFT-OFFSET))
done
# inform user of number of unique identifiers
echo -e "\nNumber of unique identifiers:" `cat $FILE | sort | uniq | wc -l`
# remove temporary file if it exists
if [ -e "$FILE" ]
then
rm $FILE
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment