Skip to content

Instantly share code, notes, and snippets.

@tkafka
Last active May 29, 2016 20:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tkafka/b7954c6d5ff7417382f0946b4757478b to your computer and use it in GitHub Desktop.
Save tkafka/b7954c6d5ff7417382f0946b4757478b to your computer and use it in GitHub Desktop.
Apifier result downloader. Usage: bash apifier-get-results <execution_id>
#!/usr/bin/env bash
if [ -z "$1" ]; then
echo "Usage: bash apifier-get-results <execution_id>"
exit 1
fi
FORMAT="csv"
EXECUTION_ID="$1"
# &offset=0&limit=10
URL="https://api.apifier.com/v1/execs/$EXECUTION_ID/results?format=$FORMAT&simplified=1"
RESULT_FILENAME="./result-$EXECUTION_ID.$FORMAT"
TMP_HEADERS="./scrape-headers.txt"
TMP_RESULT="./scrape-result.$FORMAT"
OFFSET=0
LIMIT=100000 # max 100000
rm -f $RESULT_FILENAME
function clean_up() {
rm -f "$TMP_HEADERS"
rm -f "$TMP_RESULT"
}
function clean_up_and_exit() {
clean_up
echo "Interrupted with Ctrl+C, results so far are in $RESULT_FILENAME. Bye!"
exit 1
}
# trap break with ctrl+c to clean up temp files
trap clean_up_and_exit INT
DONE=0
while [ "$DONE" -eq 0 ]; do
URL_WITH_OFFSET="$URL&offset=$OFFSET&limit=$LIMIT"
echo "Scraping $EXECUTION_ID from offset $OFFSET (limit $LIMIT): $URL_WITH_OFFSET ..."
# supress progress bar: -Ss
curl -D "$TMP_HEADERS" -o "$TMP_RESULT" "$URL_WITH_OFFSET"
CURL_RESULT="$?"
if [ "$CURL_RESULT" -eq 0 ]
then
echo " ... ok."
else
echo " ... ended with curl error $CURL_RESULT, quitting ..."
clean_up
echo "Cleaned up, bye."
exit 1
fi
RESULT_HTTP_HEADER=`cat "$TMP_HEADERS" | grep HTTP/1.1 | awk {'print $2'}`
if [ "$RESULT_HTTP_HEADER" -ne 200 ]; then
echo "Server returned error $RESULT_HTTP_HEADER, quitting ..."
clean_up
echo "Cleaned up, bye."
exit 1
fi
# append
cat "$TMP_RESULT" >> "$RESULT_FILENAME"
# read headers
APIFIER_COUNT=`grep "X-Apifier-Pagination-Count" "$TMP_HEADERS" | cut -d' ' -f2 | tr -d '\r\n'`
APIFIER_OFFSET=`grep "X-Apifier-Pagination-Offset" "$TMP_HEADERS" | cut -d' ' -f2 | tr -d '\r\n'`
APIFIER_LIMIT=`grep "X-Apifier-Pagination-Limit" "$TMP_HEADERS" | cut -d' ' -f2 | tr -d '\r\n'`
echo "Count: $APIFIER_COUNT, offset: $APIFIER_OFFSET, limit: $APIFIER_LIMIT"
OFFSET="$(($APIFIER_OFFSET+$APIFIER_COUNT))"
# count < limit? done
if [ "$APIFIER_COUNT" -lt "$APIFIER_LIMIT" ]; then
echo "That's all, so far, saved into $RESULT_FILENAME. Bye!"
DONE=1
fi
# clean up
clean_up
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment