Last active
May 10, 2024 10:39
-
-
Save joecorall/d3c5b00b74fa80bfc85920ccfb63aac9 to your computer and use it in GitHub Desktop.
rsync a production server to staging. This script is meant to run from the production server to make a copy to staging in the fastest way possible. Used on an Islandora OCFL fcrepo filesystem and a drupal filesystem totaling ~7TB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -eou pipefail | |
# the source and target directory we want to rsync to another server | |
# this script assumes its the same name on both servers | |
DIR=/opt/islandora/volumes | |
# where to send the files to | |
# replace USER and STAGING with your staging server domain | |
DESTINATION="USER@SERVER:${DIR}/" | |
# how many directories deep to scan | |
# you want to set this to the deepest directory that contains the most sub directories | |
# so we have a long list of directories we can run in parallel with rsync | |
# 5 means we'd have a list like fcrepo-data/home/data/ocfl-root/a6e/be5 | |
MAX_DEPTH=5 | |
cd $DIR || { echo "Failed to change directory to $DIR"; exit 1; } | |
echo "$(date) Starting sync" | |
job_ids=() | |
# these two drupal folders are not OCFL and are mostly derivatives | |
# so just a standard recursive rsync should be fast enough | |
rsync -azq --progress --relative --rsync-path=srsync "drupal-private-files" "$DESTINATION" & | |
job_ids+=($!) | |
rsync -azq --progress --relative --rsync-path=srsync "drupal-public-files" "$DESTINATION" & | |
job_ids+=($!) | |
OCFL_DIR="fcrepo-data" | |
# traverse the directory tree with a depth first search | |
for DEPTH in $(seq $MAX_DEPTH -1 1); do | |
# make sure we find and sync all the directories at MAX_DEPTH | |
# so we rsync anything with a greater depth | |
if [ "$DEPTH" -eq "$MAX_DEPTH" ]; then | |
echo "$(date) Syncing directories at depth >= $DEPTH" | |
find "$OCFL_DIR" -mindepth "$DEPTH" -maxdepth "$DEPTH" -type d \ | |
| parallel -v -j8 rsync -azq --progress --relative --rsync-path=srsync "{}" "$DESTINATION" | |
continue | |
fi | |
# now the max depth has been synced | |
# we'll rsync any files that exist at the given depth | |
# making sure we expand the -a flag without the -r options so we only copy files | |
echo "$(date) Syncing files at depth $DEPTH" | |
find "$OCFL_DIR" -mindepth "$DEPTH" -maxdepth "$DEPTH" -type d \ | |
| parallel -v -j8 rsync -lptgoDzq --progress --relative --rsync-path=srsync "{}/" "$DESTINATION" | |
done | |
for job_id in "${job_ids[@]}"; do | |
wait "$job_id" || echo "One job failed, but continuing anyway" | |
done | |
echo "$(date) Finished sync" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
TODO: some sort of basic inventory at the end like
and compare prod vs stage number match