Last active
December 18, 2015 02:39
-
-
Save wookietreiber/5712654 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# ------------------------------------------------------------------------------ | |
# Grid Engine Submit Parameters | |
# ------------------------------------------------------------------------------ | |
#$ -N extract-split-tar-gz | |
#$ -S /bin/bash | |
#$ -pe smp 2- | |
#$ -l h_rt=120:00:00 | |
#$ -cwd | |
#$ -o /work/$USER/$JOB_NAME-$JOB_ID.out | |
#$ -j y | |
# ------------------------------------------------------------------------------ | |
# modules to load | |
# ------------------------------------------------------------------------------ | |
[[ -r /etc/profile.d/000-modules.sh ]] && { | |
source /etc/profile.d/000-modules.sh | |
module load parallel | |
module load pigz | |
} | |
# ------------------------------------------------------------------------------ | |
# command line argument processing / configuration | |
# ------------------------------------------------------------------------------ | |
# function to display usage | |
usage() { cat << EOF | |
Usage: | |
cd /path/to/target/directory | |
qsub $0 [-v] [-c hash] /path/to/dataset.tar.gz | |
Description: | |
The script will extract the archive to the current working directory, so change | |
to the directory where you want the extracted files to be before you submit the | |
script! | |
/path/to/dataset.tar.gz path to the archived dataset, use its prefix | |
(without the -part suffix) | |
-c hash hash to use, one of | |
md5, sha1, sha224, sha256, sha384, sha512 | |
default is md5 | |
-v | --verbose output every command that is executed | |
-h | --help shows this help text | |
EOF | |
} | |
# set arguments to nothing | |
unset ARCHIVE HASH VERBOSE | |
# parse parameters | |
while true ; do | |
case "$1" in | |
-h|--help) usage ; exit ;; | |
-v|--verbose) VERBOSE=yes ; shift ;; | |
-c) shift ; HASH=$1 ; shift ;; | |
*) break ;; | |
esac | |
done | |
ARCHIVE="$1" | |
HASH=${HASH:-md5} | |
# checking existance of DATA and ARCHIVE arguments | |
if [[ -z $ARCHIVE ]] ; then | |
usage | |
exit 1 | |
fi | |
# checking ARCHIVE argument | |
if [[ ! -e $(dirname $ARCHIVE) ]] ; then | |
echo "[$(date)] [ERROR] The parent directory of your target (\"$ARCHIVE\" -> \"$(dirname $ARCHIVE)\") does not exist!" | |
exit 1 | |
fi | |
if [[ -z "$(ls $ARCHIVE.part-* 2> /dev/null)" ]] ; then | |
echo "[$(date)] [ERROR] $ARCHIVE* does not exist!" | |
exit 1 | |
fi | |
# checking HASH argument | |
case "$HASH" in | |
md5|sha1|sha224|sha256|sha384|sha512) ;; | |
*) | |
echo "[$(date)] [ERROR] Hash must be on of md5, sha1, sha224, sha256, sha384, sha512!" | |
exit | |
;; | |
esac | |
# checksum command | |
HASH_CMD=${HASH}sum | |
# checksum file | |
CHECKSUMS=${ARCHIVE}.${HASH_CMD} | |
# internals checksum file | |
INTERNAL_CHECKSUMS=${ARCHIVE}.${HASH_CMD}-internal | |
# --------------------------------------------------------------------------------------------------- | |
# bailout behaviour | |
# --------------------------------------------------------------------------------------------------- | |
# $1 line number | |
# $2 exit status | |
bailout() { | |
echo "[$(date)] [ERROR] Last command around line $1 failed with exit status \"$2\". Bailing out. Please cleanup and try again." | |
exit 1 | |
} | |
# --------------------------------------------------------------------------------------------------- | |
# pipe bailout | |
# --------------------------------------------------------------------------------------------------- | |
# $1 line number | |
pipe_bailout() { | |
for i in "${PIPESTATUS[@]}" ; do | |
[[ "x$i" != "x0" ]] && bailout $(expr $1 - 1) $i | |
done | |
return 0 | |
} | |
# --------------------------------------------------------------------------------------------------- | |
# trap bailout at error | |
# --------------------------------------------------------------------------------------------------- | |
trap 'bailout $LINENO $?' ERR | |
# --------------------------------------------------------------------------------------------------- | |
# input archive verification | |
# --------------------------------------------------------------------------------------------------- | |
echo "[$(date)] [INFO] Input archive verification ..." | |
if [[ -r $CHECKSUMS ]] ; then | |
[[ -n $VERBOSE ]] && echo "[$(date)] [DEBUG] parallel --jobs ${NSLOTS:-1} --halt-on-error 2 \"$HASH_CMD -c --status <<< {}\" :::: $CHECKSUMS" | |
cd $(dirname $ARCHIVE) | |
parallel --jobs ${NSLOTS:-1} --halt-on-error 2 "$HASH_CMD -c --status <<< {}" :::: $CHECKSUMS | |
cd $OLDPWD | |
echo "[$(date)] [SUCCESS] The input archive has been verified." | |
else | |
echo "[$(date)] [WARNING] No input archive verification, since $CHECKSUMS is not readable." | |
fi | |
# --------------------------------------------------------------------------------------------------- | |
# extraction | |
# --------------------------------------------------------------------------------------------------- | |
echo "[$(date)] [INFO] Archive extraction (will not overwrite existing files) ..." | |
[[ -n $VERBOSE ]] && echo "[$(date)] [DEBUG] for f in $ARCHIVE.part-* ; do dd if=$f bs=1M 2> /dev/null ; done | unpigz -p ${NSLOTS:-1} | tar x -k" | |
for f in $ARCHIVE.part-* ; do dd if=$f bs=1M 2> /dev/null ; done | unpigz -p ${NSLOTS:-1} | tar x -k | |
echo "[$(date)] [SUCCESS] The archive has been extracted." | |
# --------------------------------------------------------------------------------------------------- | |
# verification of the extracted files | |
# --------------------------------------------------------------------------------------------------- | |
echo "[$(date)] [INFO] Verification of the extracted files ..." | |
if [[ -r $INTERNAL_CHECKSUMS ]] ; then | |
[[ -n $VERBOSE ]] && echo "[$(date)] [DEBUG] parallel --jobs ${NSLOTS:-1} --halt-on-error 2 \"$HASH_CMD -c --status <<< {}\" :::: $INTERNAL_CHECKSUMS" | |
parallel --jobs ${NSLOTS:-1} --halt-on-error 2 "$HASH_CMD -c --status <<< {}" :::: $INTERNAL_CHECKSUMS | |
echo "[$(date)] [SUCCESS] The extracted files have been verified." | |
else | |
echo "[$(date)] [WARNING] No input archive verification, since $INTERNAL_CHECKSUMS is not readable." | |
fi | |
# --------------------------------------------------------------------------------------------------- | |
# final status message | |
# --------------------------------------------------------------------------------------------------- | |
echo "[$(date)] [SUCCESS] The archive has been extracted and verified." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment