joecorall/rsync-ocfl.sh

## rsync-ocfl.sh
#!/usr/bin/env bash

set -eou pipefail

# the source and target directory we want to rsync to another server
# this script assumes its the same name on both servers
DIR=/opt/islandora/volumes

# where to send the files to
# replace USER and STAGING with your staging server domain
DESTINATION="USER@SERVER:${DIR}/"

# how many directories deep to scan
# you want to set this to the deepest directory that contains the most sub directories
# so we have a long list of directories we can run in parallel with rsync
# 5 means we'd have a list like fcrepo-data/home/data/ocfl-root/a6e/be5
MAX_DEPTH=5

cd $DIR || { echo "Failed to change directory to $DIR"; exit 1; }

echo "$(date) Starting sync"

job_ids=()

# these two drupal folders are not OCFL and are mostly derivatives
# so just a standard recursive rsync should be fast enough
rsync -azq --progress --relative --rsync-path=srsync "drupal-private-files" "$DESTINATION" &
job_ids+=($!)
rsync -azq --progress --relative --rsync-path=srsync "drupal-public-files" "$DESTINATION" &
job_ids+=($!)

OCFL_DIR="fcrepo-data"
# traverse the directory tree with a depth first search
for DEPTH in $(seq $MAX_DEPTH -1 1); do
  # make sure we find and sync all the directories at MAX_DEPTH
  # so we rsync anything with a greater depth
  if [ "$DEPTH" -eq "$MAX_DEPTH" ]; then
    echo "$(date) Syncing directories at depth >= $DEPTH"
    find "$OCFL_DIR" -mindepth "$DEPTH" -maxdepth "$DEPTH" -type d \
      | parallel -v -j8 rsync -azq --progress --relative --rsync-path=srsync "{}" "$DESTINATION"

    continue
  fi

  # now the max depth has been synced
  # we'll rsync any files that exist at the given depth
  # making sure we expand the -a flag without the -r options so we only copy files
  echo "$(date) Syncing files at depth $DEPTH"
  find "$OCFL_DIR" -mindepth "$DEPTH" -maxdepth "$DEPTH" -type d \
    | parallel -v -j8 rsync -lptgoDzq  --progress --relative --rsync-path=srsync "{}/" "$DESTINATION"
done

for job_id in "${job_ids[@]}"; do
  wait "$job_id" || echo "One job failed, but continuing anyway"
done

echo "$(date) Finished sync"
	#!/usr/bin/env bash

	set -eou pipefail

	# the source and target directory we want to rsync to another server
	# this script assumes its the same name on both servers
	DIR=/opt/islandora/volumes

	# where to send the files to
	# replace USER and STAGING with your staging server domain
	DESTINATION="USER@SERVER:${DIR}/"

	# how many directories deep to scan
	# you want to set this to the deepest directory that contains the most sub directories
	# so we have a long list of directories we can run in parallel with rsync
	# 5 means we'd have a list like fcrepo-data/home/data/ocfl-root/a6e/be5
	MAX_DEPTH=5

	cd $DIR \|\| { echo "Failed to change directory to $DIR"; exit 1; }

	echo "$(date) Starting sync"

	job_ids=()

	# these two drupal folders are not OCFL and are mostly derivatives
	# so just a standard recursive rsync should be fast enough
	rsync -azq --progress --relative --rsync-path=srsync "drupal-private-files" "$DESTINATION" &
	job_ids+=($!)
	rsync -azq --progress --relative --rsync-path=srsync "drupal-public-files" "$DESTINATION" &
	job_ids+=($!)

	OCFL_DIR="fcrepo-data"
	# traverse the directory tree with a depth first search
	for DEPTH in $(seq $MAX_DEPTH -1 1); do
	# make sure we find and sync all the directories at MAX_DEPTH
	# so we rsync anything with a greater depth
	if [ "$DEPTH" -eq "$MAX_DEPTH" ]; then
	echo "$(date) Syncing directories at depth >= $DEPTH"
	find "$OCFL_DIR" -mindepth "$DEPTH" -maxdepth "$DEPTH" -type d \
	\| parallel -v -j8 rsync -azq --progress --relative --rsync-path=srsync "{}" "$DESTINATION"

	continue
	fi

	# now the max depth has been synced
	# we'll rsync any files that exist at the given depth
	# making sure we expand the -a flag without the -r options so we only copy files
	echo "$(date) Syncing files at depth $DEPTH"
	find "$OCFL_DIR" -mindepth "$DEPTH" -maxdepth "$DEPTH" -type d \
	\| parallel -v -j8 rsync -lptgoDzq --progress --relative --rsync-path=srsync "{}/" "$DESTINATION"
	done

	for job_id in "${job_ids[@]}"; do
	wait "$job_id" \|\| echo "One job failed, but continuing anyway"
	done

	echo "$(date) Finished sync"