Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Bash script which silently compress directory with millions of files into directory structure like this: /target/directory/2014/12/millions.tar.gz (!and delete source files!)
#!/bin/bash
#example of archiving 2013 year of directory /path/to/source/dir/subdir with 512k speed limit:
#>> ./archive_huge_dir.sh -S /path/to/source/dir -T /path/to/target/dir -L 512k -n subdir -y 2013
#example of archiving december of 2013 year of directory /path/to/source/dir/millions_files_subdir with 512k speed limit:
#>> ./archive_huge_dir.sh -S /path/to/source/dir -T /path/to/target/dir -L 512k -n subdir -y 2013 -m 12
#example of archiving several years with 512k speed limit:
#>> for year in `seq 2009 2013`; do ./archive_huge_dir.sh -S /path/to/source/dir -T /path/to/target/dir -L 512k -n subdir -y $year -Y ; done
function curtime { date +"%F %T"; }
function echotime { echo `curtime`" $@"; }
function echoerr { echotime "$@" 1>&2; }
function execs { type "$1" > /dev/null 2>&1; }
function usage { echo "Usage: $0 -S /path/to/source/dir -T /path/to/target/dir -L <optional: rate_limit> -n <subdirectory of /path/to/source/dir> -y <YEAR> -m <optional: month_number> -Y(skip confirm)" 1>&2; exit 1; }
while getopts 'S:T:L:n:y:m:Y' flag; do
case "${flag}" in
S) SOURCE_DIR="${OPTARG}" ;;
T) TARGET_DIR="${OPTARG}" ;;
L) MAX_RATE="${OPTARG}" ;;
n) SUBDIR="${OPTARG}" ;;
y) YEAR="${OPTARG}" ;;
m) MONTH="${OPTARG}" ;;
Y) NOCONFIRM=1 ;;
*) usage ;;
esac
done
if [ -z "$SOURCE_DIR" ]; then echo "(S)ource dir is required."; usage; fi
if [ -z "$TARGET_DIR" ]; then echo "(T)arget dir is required."; usage; fi
if [ -z "$SUBDIR" ]; then echo "Subdirectory (n)ame is required."; usage; fi
if [ -z "$YEAR" ]; then echo "(y)ear is required."; usage; fi
if [ ! -d "$SOURCE_DIR/$SUBDIR" ]
then
echoerr "Directory $SOURCE_DIR/$SUBDIR not exists."
usage
fi
if [ -z "$MONTH" ]
then
START_MONTH=1
END_MONTH=12
ARCHIVE_DIR=$TARGET_DIR/$YEAR
START_MONTH00=`printf '%02d' $START_MONTH`
END_MONTH00=`printf '%02d' $END_MONTH`
else
START_MONTH=$MONTH
END_MONTH=$START_MONTH
START_MONTH00=`printf '%02d' $START_MONTH`
END_MONTH00=`printf '%02d' $END_MONTH`
ARCHIVE_DIR=$TARGET_DIR/$YEAR/$START_MONTH00
fi
CUR_DIR=`pwd`
LIST_FILE=archive_huge_dir.current_files_list_`date +%s`.tmp
ARCHIVE_PATH=$ARCHIVE_DIR/$SUBDIR.tar.gz
if [ -e $ARCHIVE_PATH ]
then
echoerr "Archive $ARCHIVE_PATH already exists. Exiting."
exit 1;
fi
#0. Check Pipe Viewer installation
if ! `execs pv`
then
echoerr "Pipe Viewer is not installed."
exit 1
fi
#1. Define time interval
declare -i START_TIME=`date --utc --date="$YEAR-$START_MONTH-01" +%s`
if [ $END_MONTH -eq 12 ]
then
declare -i END_TIME=`date --utc --date="$(( YEAR+1 ))-01-01" +%s`
else
MONTH_AFTER_END00=`printf '%02d' $(( END_MONTH+1 ))`
declare -i END_TIME=`date --utc --date="$YEAR-$MONTH_AFTER_END00-01" +%s`
fi
echotime "Start time: `date --utc --date=@$START_TIME +'%F %T %Z'`"
echotime "End time: `date --utc --date=@$END_TIME +'%F %T %Z'`"
#2. Create files list (line format: <size> <relative_path>)
echotime "Creation of files list for directry '$SOURCE_DIR/$SUBDIR' for $START_MONTH-$END_MONTH monthes of $YEAR year..."
(cd $SOURCE_DIR/$SUBDIR; find . -type f -printf '%T@\t%s\t%p\n' | awk '{ if ($1 >= '$START_TIME' && $1 < '$END_TIME') print $2 "\t" $3 }' > $CUR_DIR/$LIST_FILE)
files_count=( `wc -l $LIST_FILE` )
if [ "${files_count[0]}" -gt "0" ]
then
echotime "Found ${files_count[0]} files."
echotime "Files list saved into $LIST_FILE"
if [ -z "$NOCONFIRM" ]
then
read -p "Are you sure? " -n 1 -r
echo
fi
if [[ -n "$NOCONFIRM" || $REPLY =~ ^[Yy]$ ]]
then
#3. Archive files into single tar.gz file
files_size_sum=`cat $LIST_FILE | awk '{ SUM+=$1 } END { print SUM }'`
mkdir -p $ARCHIVE_DIR
echotime "Compression started (size sum is $files_size_sum bytes, max rate is $MAX_RATE per second):"
if [ -z "$MAX_RATE" ]
then
(cd $SOURCE_DIR/$SUBDIR; cut -f 2 $CUR_DIR/$LIST_FILE | tar -c -T - | pv -s $files_size_sum | gzip > $ARCHIVE_PATH)
else
(cd $SOURCE_DIR/$SUBDIR; cut -f 2 $CUR_DIR/$LIST_FILE | tar -c -T - | pv -s $files_size_sum -L $MAX_RATE | gzip > $ARCHIVE_PATH)
fi
#4. Remove archived files if archived successfully
if [ "$?" -eq "0" ]
then
echotime "Compressed successfully! Archive path: $ARCHIVE_PATH"
echotime "Remove archived files..."
(cd $SOURCE_DIR/$SUBDIR; cut -f 2 $CUR_DIR/$LIST_FILE | while read file; do rm -f "$file"; done)
echotime "Archived files removed."
fi
fi
else
echoerr "No files for the period."
fi
#5. Remove files list file
rm $LIST_FILE
echotime "$LIST_FILE removed"
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment