Last active
August 29, 2015 14:00
-
-
Save taroved/11269983 to your computer and use it in GitHub Desktop.
Bash script which silently compress directory with millions of files into directory structure like this: /target/directory/2014/12/millions.tar.gz (!and delete source files!)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#example of archiving 2013 year of directory /path/to/source/dir/subdir with 512k speed limit: | |
#>> ./archive_huge_dir.sh -S /path/to/source/dir -T /path/to/target/dir -L 512k -n subdir -y 2013 | |
#example of archiving december of 2013 year of directory /path/to/source/dir/millions_files_subdir with 512k speed limit: | |
#>> ./archive_huge_dir.sh -S /path/to/source/dir -T /path/to/target/dir -L 512k -n subdir -y 2013 -m 12 | |
#example of archiving several years with 512k speed limit: | |
#>> for year in `seq 2009 2013`; do ./archive_huge_dir.sh -S /path/to/source/dir -T /path/to/target/dir -L 512k -n subdir -y $year -Y ; done | |
function curtime { date +"%F %T"; } | |
function echotime { echo `curtime`" $@"; } | |
function echoerr { echotime "$@" 1>&2; } | |
function execs { type "$1" > /dev/null 2>&1; } | |
function usage { echo "Usage: $0 -S /path/to/source/dir -T /path/to/target/dir -L <optional: rate_limit> -n <subdirectory of /path/to/source/dir> -y <YEAR> -m <optional: month_number> -Y(skip confirm)" 1>&2; exit 1; } | |
while getopts 'S:T:L:n:y:m:Y' flag; do | |
case "${flag}" in | |
S) SOURCE_DIR="${OPTARG}" ;; | |
T) TARGET_DIR="${OPTARG}" ;; | |
L) MAX_RATE="${OPTARG}" ;; | |
n) SUBDIR="${OPTARG}" ;; | |
y) YEAR="${OPTARG}" ;; | |
m) MONTH="${OPTARG}" ;; | |
Y) NOCONFIRM=1 ;; | |
*) usage ;; | |
esac | |
done | |
if [ -z "$SOURCE_DIR" ]; then echo "(S)ource dir is required."; usage; fi | |
if [ -z "$TARGET_DIR" ]; then echo "(T)arget dir is required."; usage; fi | |
if [ -z "$SUBDIR" ]; then echo "Subdirectory (n)ame is required."; usage; fi | |
if [ -z "$YEAR" ]; then echo "(y)ear is required."; usage; fi | |
if [ ! -d "$SOURCE_DIR/$SUBDIR" ] | |
then | |
echoerr "Directory $SOURCE_DIR/$SUBDIR not exists." | |
usage | |
fi | |
if [ -z "$MONTH" ] | |
then | |
START_MONTH=1 | |
END_MONTH=12 | |
ARCHIVE_DIR=$TARGET_DIR/$YEAR | |
START_MONTH00=`printf '%02d' $START_MONTH` | |
END_MONTH00=`printf '%02d' $END_MONTH` | |
else | |
START_MONTH=$MONTH | |
END_MONTH=$START_MONTH | |
START_MONTH00=`printf '%02d' $START_MONTH` | |
END_MONTH00=`printf '%02d' $END_MONTH` | |
ARCHIVE_DIR=$TARGET_DIR/$YEAR/$START_MONTH00 | |
fi | |
CUR_DIR=`pwd` | |
LIST_FILE=archive_huge_dir.current_files_list_`date +%s`.tmp | |
ARCHIVE_PATH=$ARCHIVE_DIR/$SUBDIR.tar.gz | |
if [ -e $ARCHIVE_PATH ] | |
then | |
echoerr "Archive $ARCHIVE_PATH already exists. Exiting." | |
exit 1; | |
fi | |
#0. Check Pipe Viewer installation | |
if ! `execs pv` | |
then | |
echoerr "Pipe Viewer is not installed." | |
exit 1 | |
fi | |
#1. Define time interval | |
declare -i START_TIME=`date --utc --date="$YEAR-$START_MONTH-01" +%s` | |
if [ $END_MONTH -eq 12 ] | |
then | |
declare -i END_TIME=`date --utc --date="$(( YEAR+1 ))-01-01" +%s` | |
else | |
MONTH_AFTER_END00=`printf '%02d' $(( END_MONTH+1 ))` | |
declare -i END_TIME=`date --utc --date="$YEAR-$MONTH_AFTER_END00-01" +%s` | |
fi | |
echotime "Start time: `date --utc --date=@$START_TIME +'%F %T %Z'`" | |
echotime "End time: `date --utc --date=@$END_TIME +'%F %T %Z'`" | |
#2. Create files list (line format: <size> <relative_path>) | |
echotime "Creation of files list for directry '$SOURCE_DIR/$SUBDIR' for $START_MONTH-$END_MONTH monthes of $YEAR year..." | |
(cd $SOURCE_DIR/$SUBDIR; find . -type f -printf '%T@\t%s\t%p\n' | awk '{ if ($1 >= '$START_TIME' && $1 < '$END_TIME') print $2 "\t" $3 }' > $CUR_DIR/$LIST_FILE) | |
files_count=( `wc -l $LIST_FILE` ) | |
if [ "${files_count[0]}" -gt "0" ] | |
then | |
echotime "Found ${files_count[0]} files." | |
echotime "Files list saved into $LIST_FILE" | |
if [ -z "$NOCONFIRM" ] | |
then | |
read -p "Are you sure? " -n 1 -r | |
echo | |
fi | |
if [[ -n "$NOCONFIRM" || $REPLY =~ ^[Yy]$ ]] | |
then | |
#3. Archive files into single tar.gz file | |
files_size_sum=`cat $LIST_FILE | awk '{ SUM+=$1 } END { print SUM }'` | |
mkdir -p $ARCHIVE_DIR | |
echotime "Compression started (size sum is $files_size_sum bytes, max rate is $MAX_RATE per second):" | |
if [ -z "$MAX_RATE" ] | |
then | |
(cd $SOURCE_DIR/$SUBDIR; cut -f 2 $CUR_DIR/$LIST_FILE | tar -c -T - | pv -s $files_size_sum | gzip > $ARCHIVE_PATH) | |
else | |
(cd $SOURCE_DIR/$SUBDIR; cut -f 2 $CUR_DIR/$LIST_FILE | tar -c -T - | pv -s $files_size_sum -L $MAX_RATE | gzip > $ARCHIVE_PATH) | |
fi | |
#4. Remove archived files if archived successfully | |
if [ "$?" -eq "0" ] | |
then | |
echotime "Compressed successfully! Archive path: $ARCHIVE_PATH" | |
echotime "Remove archived files..." | |
(cd $SOURCE_DIR/$SUBDIR; cut -f 2 $CUR_DIR/$LIST_FILE | while read file; do rm -f "$file"; done) | |
echotime "Archived files removed." | |
fi | |
fi | |
else | |
echoerr "No files for the period." | |
fi | |
#5. Remove files list file | |
rm $LIST_FILE | |
echotime "$LIST_FILE removed" | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment