Last active
April 16, 2022 20:31
-
-
Save Munksgaard/bb34994ba9be4d25bd4e8d6a5d27a777 to your computer and use it in GitHub Desktop.
A tool to get datta from external sources, e.g. ERDA. An alternative to git-annex or LFS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.external-data/rodinia/hotspot/data/1024.in https://sid.erda.dk/share_redirect/FlhwY8rtfk/rodinia/hotspot/1024.in 3ead297c7a98339297ba0fd351aa3e364b476aac4afe9fb568271e73741adf7b | |
.external-data/rodinia\ 5/hotspot/data/1024.in https://sid.erda.dk/share_redirect/FlhwY8rtfk/rodinia/hotspot/1024.in 3ead297c7a98339297ba0fd351aa3e364b476aac4afe9fb568271e73741adf7b |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# | |
# usage: get-data.sh external-data.txt | |
# | |
# external-data.txt file must contain lines of the format: | |
# | |
# PATH URL SHA256SUM | |
# | |
# get-data.sh will attempt to download the file at URL into PATH (relative to | |
# the location of external-data.txt) after verifying that the sha256sum is | |
# identical to SHA256SUM. PATH can contain escaped spaces. | |
set -o errexit | |
set -o pipefail | |
set -o nounset | |
if [ "$#" -ne "1" ]; then | |
echo "Usage: $0 FILE" | |
echo "FILE must be a file containing lines of the format:" | |
echo " PATH URL SHA256SUM" | |
echo "$0 will attempt to download the file at URL into PATH (relative to" | |
echo "the location of external-data.txt) after verifying that the sha256sum is" | |
echo "identical to SHA256SUM. PATH can contain escaped spaces." | |
exit 3 | |
fi | |
if [ -z "$(which sha256sum)" ]; then | |
echo "Error: sha256sum could not be found." | |
exit 4 | |
fi | |
if [ -z "$(which curl)" ]; then | |
echo "Error: curl could not be found." | |
exit 5 | |
fi | |
BASEDIR=$(dirname "$1") | |
# shellcheck disable=SC2162 | |
while read OUTPUT URL CHECKSUM; do | |
echo "Now processing $OUTPUT..." | |
if [ -f "$OUTPUT" ]; then | |
COMPUTED_SUM=$(sha256sum "$OUTPUT" | cut --fields=1 --delimiter=' ') | |
if [ "$COMPUTED_SUM" = "$CHECKSUM" ]; then | |
echo "File exists. Skipping." | |
continue | |
else | |
echo "Error: File exists but has invalid checksum!" | |
echo "Expected $CHECKSUM, got $COMPUTED_SUM." | |
echo "You can manually delete the file to get the correct version." | |
exit 2 | |
fi | |
fi | |
TMPFILE=$(mktemp) | |
curl --fail "$URL" --output "$TMPFILE" | |
COMPUTED_SUM=$(sha256sum "$TMPFILE" | cut --fields=1 --delimiter=' ') | |
if [ "$COMPUTED_SUM" = "$CHECKSUM" ]; then | |
mkdir --parents "${BASEDIR}/$(dirname "$OUTPUT")" | |
mv "$TMPFILE" "${BASEDIR}/${OUTPUT}" | |
else | |
echo "Error: Invalid checksum of downloaded file!" | |
echo "Expected $CHECKSUM, got $COMPUTED_SUM." | |
exit 1 | |
fi | |
done < "$1" | |
echo "Done." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment