Last active
April 19, 2020 09:32
-
-
Save jcf/f371d43033317b5908809cbabed9d752 to your computer and use it in GitHub Desktop.
Parallel rsync script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env zsh | |
# Usage: | |
# | |
# sync.sh | |
set -ex | |
# Number of rsync processes to run | |
n=10 | |
# Remote host address | |
remote="192.168.1.50" | |
# Local path to replicate remote file structure | |
dest="/mnt/pool/import/" | |
# List files and their sizes so we can plan our parallel sync | |
get-manifest() { | |
rsync -avzm \ | |
--safe-links \ | |
--ignore-existing \ | |
--out-format="%l %n" \ | |
--no-v \ | |
--dry-run $@ | sort -n -r | |
} | |
# Use a temporary directory, and clean up on exit. Disable because | |
# it's easier to debug with... | |
# tmpdir=$(mktemp -d) | |
# trap "rm -rf $tmpdir" EXIT | |
# Hardcoded tmp dir | |
tmpdir="/tmp/sync" | |
mkdir -p $tmpdir | |
cd $tmpdir | |
# Change the "/Volumes/" path to the common parent of all your sync directories, | |
# and make sure to include the directories you need. | |
# | |
# Below, I include everything in /Volumes/Seven/Movies, /Volumes/Seven/Software, | |
# and /Volumes/Three/iTunes. Everything else will be excluded because of the | |
# catch all `--exclude="*"` rule that will applied to anything that does not | |
# match earlier in the list of inclusions/exclusions. | |
get-manifest \ | |
$remote:/Volumes/ \ | |
--exclude='.*' \ | |
--include='Seven/Movies/***' \ | |
--include='Seven/Software/***' \ | |
--include='Three/iTunes/***' \ | |
--include='*/' \ | |
--exclude='*' \ | |
$dest > $tmpdir/manifest.txt | |
# Split the manifest of files into multiple files in a roundrobin fashion | |
# so we share out the big files, and saturate the network/disk. | |
# | |
# We'll end with a manifest.txt file, and several manifest-XX.txt files. | |
cat $tmpdir/manifest.txt \ | |
| cut -d' ' -f 2- \ | |
| split --number=r/$n --additional-suffix=.txt - manifest- | |
ls -al $tmpdir | |
# This finds each split file, and farms them out to rsync via GNU parallel. | |
find "$tmpdir" -type f -name "manifest-*.txt" \ | |
| parallel \ | |
--will-cite \ | |
-j $n \ | |
-t \ | |
--verbose \ | |
--progress \ | |
rsync \ | |
-avzm \ | |
--prune-empty-dirs \ | |
--files-from={} \ | |
$remote:/Volumes/ \ | |
$dest |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment