Skip to content

Instantly share code, notes, and snippets.

@taroved
Last active August 29, 2015 13:59
Show Gist options
  • Save taroved/10996793 to your computer and use it in GitHub Desktop.
Save taroved/10996793 to your computer and use it in GitHub Desktop.
Obsolete! Use https://gist.github.com/taroved/11269983 instead. (Bash script which silently compress directory with millions of files into directory structure like this: /target/directory/2014/12/millions.tar.gz (!and delete source files!))
#!/bin/bash
#example of archiving 2013 year of directory /path/to/source/dir/millions_files_subdir with 512k speed limit:
#>> archive_millions_files_dir.sh 512k millions_files_subdir 2013
#example of archiving december of 2013 year of directory /path/to/source/dir/millions_files_subdir with 512k speed limit:
#>> archive_millions_files_dir.sh 512k millions_files_subdir 2013 12
#example of archiving several years with 512k speed limit:
#>> for year in `seq 2010 2013`; do for subdir in `ls /path/to/source/dir`; do archive_millions_files_dir.sh 512k $subdir $year; done; done
SOURCE_DIR=/path/to/source/dir
TARGET_DIR=/path/to/target/dir
curtime() {
date +"%F %T"
}
usage() { echo "Usage: $0 <rate_limit> <subdirectory of $SOURCE_DIR> <YEAR> <optional: month_number>" 1>&2; exit 1; }
[ -z $1 ] && usage
MAX_RATE=$1
if [ ! -d "$SOURCE_DIR/$2" ]
then
echo `curtime`" Directory $SOURCE_DIR/$2 not exists."
usage
fi
SUBDIR=$2
if [ -z "$3" ]
then
echo "Year is required."
usage
fi
YEAR=$3
if [ -z "$4" ]
then
START_MONTH=1
END_MONTH=12
ARCHIVE_DIR=$TARGET_DIR/$YEAR
START_MONTH00=`printf '%02d' $START_MONTH`
END_MONTH00=`printf '%02d' $END_MONTH`
else
START_MONTH=$4
END_MONTH=$START_MONTH
START_MONTH00=`printf '%02d' $START_MONTH`
END_MONTH00=`printf '%02d' $END_MONTH`
ARCHIVE_DIR=$TARGET_DIR/$YEAR/$START_MONTH00
fi
CUR_DIR=`pwd`
LIST_FILE=archive_millions_files_dir.current_files_list_`date +%s`.tmp
ARCHIVE_PATH=$ARCHIVE_DIR/$SUBDIR.tar.gz
if [ -e $ARCHIVE_PATH ]
then
echo "Archive $ARCHIVE_PATH already exists. Exiting."
exit 1;
fi
#0. Check Pipe Viewer installation
if !(dpkg-query -W pv)
then
echo "Pipe Viewer not installed. To install Pipe Viewer 'sudo apt-get install pv'"
exit 1
fi
#1. Define time interval
declare -i START_TIME=`date --utc --date="$YEAR-$START_MONTH-01" +%s`
if [ $END_MONTH -eq 12 ]
then
declare -i END_TIME=`date --utc --date="$(( YEAR+1 ))-01-01" +%s`
else
MONTH_AFTER_END00=`printf '%02d' $(( END_MONTH+1 ))`
declare -i END_TIME=`date --utc --date="$YEAR-$MONTH_AFTER_END00-01" +%s`
fi
echo `curtime`" Start time: `date --utc --date=@$START_TIME`"
echo `curtime`" End time: `date --utc --date=@$END_TIME`"
#2. Create files list (line format: <time> <size> <name>)
echo `curtime`" Creation of files list for directry '$SOURCE_DIR/$SUBDIR' for $START_MONTH-$END_MONTH monthes of $YEAR year..."
#heavy: find $SOURCE_DIR/$SUBDIR -type f -printf '%T@\t%s\t%f\n' \
(cd $SOURCE_DIR/$SUBDIR; nice -n 19 ls -Ul --time-style=+%s --block-size=1 \
| awk '{ if ($6 >= '$START_TIME' && $6 < '$END_TIME') print $6 "\t" $5 "\t" $7 }' > $CUR_DIR/$LIST_FILE)
files_count=( `wc -l $LIST_FILE` )
if [ "${files_count[0]}" -gt "0" ]
then
echo `curtime`" Found ${files_count[0]} files."
echo `curtime`" Files list saved into $LIST_FILE"
#3. Archive files into single tar.gz file
files_size_sum=`cat $LIST_FILE | awk '{ SUM+=$2 } END { print SUM }'`
mkdir -p $ARCHIVE_DIR
echo `curtime`" Compression started (size sum is $files_size_sum bytes, max rate is $MAX_RATE per second):"
(cd $SOURCE_DIR; cat $CUR_DIR/$LIST_FILE | awk '{ print $3 }' | sed "s|^|$SUBDIR/|" \
| nice -n 19 tar -c -T - | pv -s $files_size_sum -L $MAX_RATE | nice -n 19 gzip > $ARCHIVE_PATH)
#4. Remove archived files if archived successfully
if [ "$?" -eq "0" ]
then
echo `curtime`" Compressed successfully! Archive path: $ARCHIVE_PATH"
echo `curtime`" Remove archived files..."
(cd $SOURCE_DIR; cat $CUR_DIR/$LIST_FILE | awk '{ print $3 }' | sed "s|^|$SUBDIR/|" \
| while read file; do nice -n 19 rm -f "$file"; done)
echo `curtime`" Archived files removed."
fi
else
echo `curtime`" No files for the period."
fi
#5. Remove files list file
rm $LIST_FILE
echo `curtime`" $LIST_FILE removed"
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment