Last active
August 29, 2015 13:59
-
-
Save taroved/10996793 to your computer and use it in GitHub Desktop.
Obsolete! Use https://gist.github.com/taroved/11269983 instead. (Bash script which silently compress directory with millions of files into directory structure like this: /target/directory/2014/12/millions.tar.gz (!and delete source files!))
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#example of archiving 2013 year of directory /path/to/source/dir/millions_files_subdir with 512k speed limit: | |
#>> archive_millions_files_dir.sh 512k millions_files_subdir 2013 | |
#example of archiving december of 2013 year of directory /path/to/source/dir/millions_files_subdir with 512k speed limit: | |
#>> archive_millions_files_dir.sh 512k millions_files_subdir 2013 12 | |
#example of archiving several years with 512k speed limit: | |
#>> for year in `seq 2010 2013`; do for subdir in `ls /path/to/source/dir`; do archive_millions_files_dir.sh 512k $subdir $year; done; done | |
SOURCE_DIR=/path/to/source/dir | |
TARGET_DIR=/path/to/target/dir | |
curtime() { | |
date +"%F %T" | |
} | |
usage() { echo "Usage: $0 <rate_limit> <subdirectory of $SOURCE_DIR> <YEAR> <optional: month_number>" 1>&2; exit 1; } | |
[ -z $1 ] && usage | |
MAX_RATE=$1 | |
if [ ! -d "$SOURCE_DIR/$2" ] | |
then | |
echo `curtime`" Directory $SOURCE_DIR/$2 not exists." | |
usage | |
fi | |
SUBDIR=$2 | |
if [ -z "$3" ] | |
then | |
echo "Year is required." | |
usage | |
fi | |
YEAR=$3 | |
if [ -z "$4" ] | |
then | |
START_MONTH=1 | |
END_MONTH=12 | |
ARCHIVE_DIR=$TARGET_DIR/$YEAR | |
START_MONTH00=`printf '%02d' $START_MONTH` | |
END_MONTH00=`printf '%02d' $END_MONTH` | |
else | |
START_MONTH=$4 | |
END_MONTH=$START_MONTH | |
START_MONTH00=`printf '%02d' $START_MONTH` | |
END_MONTH00=`printf '%02d' $END_MONTH` | |
ARCHIVE_DIR=$TARGET_DIR/$YEAR/$START_MONTH00 | |
fi | |
CUR_DIR=`pwd` | |
LIST_FILE=archive_millions_files_dir.current_files_list_`date +%s`.tmp | |
ARCHIVE_PATH=$ARCHIVE_DIR/$SUBDIR.tar.gz | |
if [ -e $ARCHIVE_PATH ] | |
then | |
echo "Archive $ARCHIVE_PATH already exists. Exiting." | |
exit 1; | |
fi | |
#0. Check Pipe Viewer installation | |
if !(dpkg-query -W pv) | |
then | |
echo "Pipe Viewer not installed. To install Pipe Viewer 'sudo apt-get install pv'" | |
exit 1 | |
fi | |
#1. Define time interval | |
declare -i START_TIME=`date --utc --date="$YEAR-$START_MONTH-01" +%s` | |
if [ $END_MONTH -eq 12 ] | |
then | |
declare -i END_TIME=`date --utc --date="$(( YEAR+1 ))-01-01" +%s` | |
else | |
MONTH_AFTER_END00=`printf '%02d' $(( END_MONTH+1 ))` | |
declare -i END_TIME=`date --utc --date="$YEAR-$MONTH_AFTER_END00-01" +%s` | |
fi | |
echo `curtime`" Start time: `date --utc --date=@$START_TIME`" | |
echo `curtime`" End time: `date --utc --date=@$END_TIME`" | |
#2. Create files list (line format: <time> <size> <name>) | |
echo `curtime`" Creation of files list for directry '$SOURCE_DIR/$SUBDIR' for $START_MONTH-$END_MONTH monthes of $YEAR year..." | |
#heavy: find $SOURCE_DIR/$SUBDIR -type f -printf '%T@\t%s\t%f\n' \ | |
(cd $SOURCE_DIR/$SUBDIR; nice -n 19 ls -Ul --time-style=+%s --block-size=1 \ | |
| awk '{ if ($6 >= '$START_TIME' && $6 < '$END_TIME') print $6 "\t" $5 "\t" $7 }' > $CUR_DIR/$LIST_FILE) | |
files_count=( `wc -l $LIST_FILE` ) | |
if [ "${files_count[0]}" -gt "0" ] | |
then | |
echo `curtime`" Found ${files_count[0]} files." | |
echo `curtime`" Files list saved into $LIST_FILE" | |
#3. Archive files into single tar.gz file | |
files_size_sum=`cat $LIST_FILE | awk '{ SUM+=$2 } END { print SUM }'` | |
mkdir -p $ARCHIVE_DIR | |
echo `curtime`" Compression started (size sum is $files_size_sum bytes, max rate is $MAX_RATE per second):" | |
(cd $SOURCE_DIR; cat $CUR_DIR/$LIST_FILE | awk '{ print $3 }' | sed "s|^|$SUBDIR/|" \ | |
| nice -n 19 tar -c -T - | pv -s $files_size_sum -L $MAX_RATE | nice -n 19 gzip > $ARCHIVE_PATH) | |
#4. Remove archived files if archived successfully | |
if [ "$?" -eq "0" ] | |
then | |
echo `curtime`" Compressed successfully! Archive path: $ARCHIVE_PATH" | |
echo `curtime`" Remove archived files..." | |
(cd $SOURCE_DIR; cat $CUR_DIR/$LIST_FILE | awk '{ print $3 }' | sed "s|^|$SUBDIR/|" \ | |
| while read file; do nice -n 19 rm -f "$file"; done) | |
echo `curtime`" Archived files removed." | |
fi | |
else | |
echo `curtime`" No files for the period." | |
fi | |
#5. Remove files list file | |
rm $LIST_FILE | |
echo `curtime`" $LIST_FILE removed" | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment