Skip to content

Instantly share code, notes, and snippets.

@wookietreiber
Last active December 18, 2015 02:39
Show Gist options
  • Save wookietreiber/5712654 to your computer and use it in GitHub Desktop.
Save wookietreiber/5712654 to your computer and use it in GitHub Desktop.
#!/bin/bash
# ------------------------------------------------------------------------------
# Grid Engine Submit Parameters
# ------------------------------------------------------------------------------
#$ -N extract-split-tar-gz
#$ -S /bin/bash
#$ -pe smp 2-
#$ -l h_rt=120:00:00
#$ -cwd
#$ -o /work/$USER/$JOB_NAME-$JOB_ID.out
#$ -j y
# ------------------------------------------------------------------------------
# modules to load
# ------------------------------------------------------------------------------
[[ -r /etc/profile.d/000-modules.sh ]] && {
source /etc/profile.d/000-modules.sh
module load parallel
module load pigz
}
# ------------------------------------------------------------------------------
# command line argument processing / configuration
# ------------------------------------------------------------------------------
# function to display usage
usage() { cat << EOF
Usage:
cd /path/to/target/directory
qsub $0 [-v] [-c hash] /path/to/dataset.tar.gz
Description:
The script will extract the archive to the current working directory, so change
to the directory where you want the extracted files to be before you submit the
script!
/path/to/dataset.tar.gz path to the archived dataset, use its prefix
(without the -part suffix)
-c hash hash to use, one of
md5, sha1, sha224, sha256, sha384, sha512
default is md5
-v | --verbose output every command that is executed
-h | --help shows this help text
EOF
}
# set arguments to nothing
unset ARCHIVE HASH VERBOSE
# parse parameters
while true ; do
case "$1" in
-h|--help) usage ; exit ;;
-v|--verbose) VERBOSE=yes ; shift ;;
-c) shift ; HASH=$1 ; shift ;;
*) break ;;
esac
done
ARCHIVE="$1"
HASH=${HASH:-md5}
# checking existance of DATA and ARCHIVE arguments
if [[ -z $ARCHIVE ]] ; then
usage
exit 1
fi
# checking ARCHIVE argument
if [[ ! -e $(dirname $ARCHIVE) ]] ; then
echo "[$(date)] [ERROR] The parent directory of your target (\"$ARCHIVE\" -> \"$(dirname $ARCHIVE)\") does not exist!"
exit 1
fi
if [[ -z "$(ls $ARCHIVE.part-* 2> /dev/null)" ]] ; then
echo "[$(date)] [ERROR] $ARCHIVE* does not exist!"
exit 1
fi
# checking HASH argument
case "$HASH" in
md5|sha1|sha224|sha256|sha384|sha512) ;;
*)
echo "[$(date)] [ERROR] Hash must be on of md5, sha1, sha224, sha256, sha384, sha512!"
exit
;;
esac
# checksum command
HASH_CMD=${HASH}sum
# checksum file
CHECKSUMS=${ARCHIVE}.${HASH_CMD}
# internals checksum file
INTERNAL_CHECKSUMS=${ARCHIVE}.${HASH_CMD}-internal
# ---------------------------------------------------------------------------------------------------
# bailout behaviour
# ---------------------------------------------------------------------------------------------------
# $1 line number
# $2 exit status
bailout() {
echo "[$(date)] [ERROR] Last command around line $1 failed with exit status \"$2\". Bailing out. Please cleanup and try again."
exit 1
}
# ---------------------------------------------------------------------------------------------------
# pipe bailout
# ---------------------------------------------------------------------------------------------------
# $1 line number
pipe_bailout() {
for i in "${PIPESTATUS[@]}" ; do
[[ "x$i" != "x0" ]] && bailout $(expr $1 - 1) $i
done
return 0
}
# ---------------------------------------------------------------------------------------------------
# trap bailout at error
# ---------------------------------------------------------------------------------------------------
trap 'bailout $LINENO $?' ERR
# ---------------------------------------------------------------------------------------------------
# input archive verification
# ---------------------------------------------------------------------------------------------------
echo "[$(date)] [INFO] Input archive verification ..."
if [[ -r $CHECKSUMS ]] ; then
[[ -n $VERBOSE ]] && echo "[$(date)] [DEBUG] parallel --jobs ${NSLOTS:-1} --halt-on-error 2 \"$HASH_CMD -c --status <<< {}\" :::: $CHECKSUMS"
cd $(dirname $ARCHIVE)
parallel --jobs ${NSLOTS:-1} --halt-on-error 2 "$HASH_CMD -c --status <<< {}" :::: $CHECKSUMS
cd $OLDPWD
echo "[$(date)] [SUCCESS] The input archive has been verified."
else
echo "[$(date)] [WARNING] No input archive verification, since $CHECKSUMS is not readable."
fi
# ---------------------------------------------------------------------------------------------------
# extraction
# ---------------------------------------------------------------------------------------------------
echo "[$(date)] [INFO] Archive extraction (will not overwrite existing files) ..."
[[ -n $VERBOSE ]] && echo "[$(date)] [DEBUG] for f in $ARCHIVE.part-* ; do dd if=$f bs=1M 2> /dev/null ; done | unpigz -p ${NSLOTS:-1} | tar x -k"
for f in $ARCHIVE.part-* ; do dd if=$f bs=1M 2> /dev/null ; done | unpigz -p ${NSLOTS:-1} | tar x -k
echo "[$(date)] [SUCCESS] The archive has been extracted."
# ---------------------------------------------------------------------------------------------------
# verification of the extracted files
# ---------------------------------------------------------------------------------------------------
echo "[$(date)] [INFO] Verification of the extracted files ..."
if [[ -r $INTERNAL_CHECKSUMS ]] ; then
[[ -n $VERBOSE ]] && echo "[$(date)] [DEBUG] parallel --jobs ${NSLOTS:-1} --halt-on-error 2 \"$HASH_CMD -c --status <<< {}\" :::: $INTERNAL_CHECKSUMS"
parallel --jobs ${NSLOTS:-1} --halt-on-error 2 "$HASH_CMD -c --status <<< {}" :::: $INTERNAL_CHECKSUMS
echo "[$(date)] [SUCCESS] The extracted files have been verified."
else
echo "[$(date)] [WARNING] No input archive verification, since $INTERNAL_CHECKSUMS is not readable."
fi
# ---------------------------------------------------------------------------------------------------
# final status message
# ---------------------------------------------------------------------------------------------------
echo "[$(date)] [SUCCESS] The archive has been extracted and verified."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment