Skip to content

Instantly share code, notes, and snippets.

@jpalpant
Last active June 4, 2021 01:49
Show Gist options
  • Save jpalpant/2c04ec5f89d7153e60bdac48b318fe44 to your computer and use it in GitHub Desktop.
Save jpalpant/2c04ec5f89d7153e60bdac48b318fe44 to your computer and use it in GitHub Desktop.
Restore script for btrbk raw backups over SSH or from mountable CIFS target
#!/usr/bin/env bash
set -eo pipefail
HELPTEXT="\
Restore a btrbk raw-type backup of a single subvolume by reading the archives over SSH or over a CIFS mount. Requires btrfs-progs, btrbk, ssh, pv. gpg required if backups are encrypted; lz4 or other compression tool required if backups are compressed.
SSH-mode resembles how btrbk creates these archives, and should be possible for most btrbk raw target use-cases. This is the default. SSH host and port information is read from btrbk. SSH user and SSH key information is not - these will use SSH defaults, or can be provided by arguments.
CIFS-mode is just because my backups are stored on slow NAS that exposes a CIFS, and CIFS is faster than SSH.
Usage: btrbk-restore SOURCE_PATH
Options:
-i/--ssh-id KEYFILE - path to an SSH private key to use for SSH commands. Optional, defaults to using your SSH config.
--ssh-user USER - target user to use for SSH commands 'ssh USER@...'. Optional, uses SSH defaults if not needed.
--cifs CIFS_MOUNT_POINT - restore tarballs via mounting the CIFS target CIFS_MOUNT_POINT to a temporary directory.
--cifs-user USER - use the given username as the -o user=USER argument to the mount.cifs command. Default: "'$USER'" ($USER)
-d/--dest PATH - path to an existing local subvolume where backups will be restored
--at DATE - target date for backup restore. Will restore backups necessary to get the backup which immediately precedes the date given. Default: now
"
function parse_backup_row {
declare -n backup_array=$1
local row="${@:2}"
backup_array["host"]=$(echo "$row" | awk -F' ' '{print $4}')
backup_array["port"]=$(echo "$row" | awk -F' ' '{print $5}')
backup_array["backup_path"]=$(echo "$row" | awk -F' ' '{print $6}')
backup_array["dir"]=$(dirname ${backup_array["backup_path"]})
backup_array["backup_name"]=$(basename ${backup_array["backup_path"]})
backup_array["datestamp"]=$(echo ${backup_array["backup_name"]} | cut -d. -f2)
backup_array["date"]=$(date -d ${backup_array["datestamp"]})
backup_array["date_s"]=$(date -d ${backup_array["datestamp"]} +%s)
backup_array["subvolume"]=$(echo ${backup_array["backup_name"]} | cut -d. -f1-2)
backup_array["snapshot"]=$(echo ${backup_array["backup_name"]} | cut -d. -f1)
backup_array["splits"]=${backup_array["backup_name"]}.split_*
backup_array["info"]=${backup_array["backup_name"]}.info
if [[ "$MODE" == "cifs" ]]; then
backup["dir"]=${CIFS_MOUNT_DIR}
fi
}
function parse_backup_info {
declare -n info_array=$1
while IFS== read -r key value; do
if [[ -n "$key" ]] && [[ -n "$value" ]]; then
info_array["$key"]=$value
fi
done
}
function cifs_exec() {
declare -n _backup=$1; shift
$@
}
function ssh_exec() {
declare -n _backup=$1 ; shift
ssh_opts="-T -o Compression=no"
if [[ -v SSH_ID ]]; then
ssh_opts="$ssh_opts -i $SSH_ID"
fi
btrssh="ssh $ssh_opts -p ${backup["port"]} -C $SSH_USER@${backup["host"]}"
$btrssh $@
}
## Argument parsing
POSITIONAL=()
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
-h|--help)
echo "$HELPTEXT"
exit 0
;;
-i|--ssh-id)
if [[ -v MODE ]] && [[ "$MODE" != "ssh" ]]; then
echo "Cannot provide --ssh-id when using mode $MODE."
exit -1
else
MODE="ssh"
SSH_ID=$2
fi
shift
shift
;;
--ssh-user)
if [[ -v MODE ]] && [[ "$MODE" != "ssh" ]]; then
echo "Cannot provide --ssh-user when using mode $MODE."
exit -1
else
MODE="ssh"
SSH_USER=$2
fi
shift
shift
;;
--cifs)
if [[ -v MODE ]] && [[ "$MODE" != "cifs" ]]; then
echo "Cannot provide --cifs when in mode $MODE"
exit -1
else
MODE="cifs"
CIFS_MOUNT_POINT=$2
fi
shift
shift
;;
--cifs-user)
if [[ -v MODE ]] && [[ "$MODE" != "cifs" ]]; then
echo "Cannot provide --cifs-user when using mode $MODE."
exit -1
else
MODE="cifs"
CIFS_USER=$2
fi
shift
shift
;;
-d|--dest)
TARGET_SUBVOLUME=$2
shift
shift
;;
--at)
if date -d "$2" > /dev/null ; then
RESTORE_DATE="$(date -d $2)"
else
echo "Date $2 cannot be validated by date -d, please enter a valid datestamp"
exit -1
fi
shift
shift
;;
*)
POSITIONAL+=("$1") # save it in an array for later
shift # past argument
;;
esac
done
## Clarify and validate arguments
if [[ ! -v MODE ]]; then
MODE="ssh"
fi
if [[ ! -v RESTORE_DATE ]]; then
RESTORE_DATE="$(date -d today)"
fi
RESTORE_DATE_S=$(date -d "$RESTORE_DATE" +%s)
set -u # no unbound variables after this
if [[ ! -v TARGET_SUBVOLUME ]]; then
read -e -p "Please enter local subvolume to target for restoring backups: " TARGET_SUBVOLUME
fi
TARGET_SUBVOLUME=${TARGET_SUBVOLUME%/}
if ! btrfs subvolume show $TARGET_SUBVOLUME > /dev/null; then
echo "Target $TARGET_SUBVOLUME is not a btrfs subvolume."
exit -1
fi
if [[ ! -v POSITIONAL[0] ]]; then
OIFS=$IFS
IFS=$'\n'
sources=( $(btrbk list source) )
IFS=$OIFS
echo ${sources[0]}
for idx in ${!sources[@]}; do
if [[ $idx != 0 ]]; then
echo "$idx) ${sources[$idx]}"
fi
done
read -p "Choose subvolume to resource: " source_idx
if ! [[ "$source_idx" =~ ^[0-9]+$ && $source_idx -gt 0 && $source_idx -lt ${#sources[@]} ]] ; then
echo "Invalid selection $source_idx, please enter an integer from 1-$(( ${#sources[@]} - 1 ))"
exit -1
fi
source_row=${sources[$source_idx]}
else
source_name=${POSITIONAL[0]}
source_row=$(btrbk list source | grep $source_name)
fi
if [[ -z "$source_row" ]]; then
echo "Could not find value backup source definition to restore"
exit -1
fi
snapshot_name=$(echo ${source_row} | awk -F' ' '{print $3}')
source_subvolume=$(echo ${source_row} | awk -F' ' '{print $1}')
echo "Restoring backups for $snapshot_name as close to $RESTORE_DATE as possible (not later)"
## Prep according to the given mode
if [[ "$MODE" == "ssh" ]]; then
exec_for_backup=ssh_exec
elif [[ "$MODE" == "cifs" ]]; then
CIFS_MOUNT_DIR=$(mktemp -d)
mount_command="mount -v -t cifs -o vers=3.0 $CIFS_MOUNT_POINT $CIFS_MOUNT_DIR -o user=${CIFS_USER:-$USER}"
echo "Mounting: $mount_command"
eval $mount_command
trap "umount $CIFS_MOUNT_DIR; rmdir $CIFS_MOUNT_DIR" INT EXIT;
exec_for_backup=cifs_exec
fi
## Fetch candidate backups
OIFS=$IFS
IFS=$'\n'
initial_backup_list=( $(btrbk list backups | grep "$source_subvolume") )
IFS=$OIFS
## Filter backups to match the correct date requested. Select backups from the last non-incremental backup before the target date up through the target date.
echo "Filtering unnecessary backups"
declare -a target_backups_reverse_ordered
declare -a target_backups
for (( idx=${#initial_backup_list[@]}-1 ; idx>=0 ; idx-- )) ; do
backup_row="${initial_backup_list[$idx]}"
declare -A backup
parse_backup_row backup ${backup_row}
if [[ ${backup["date_s"]} -gt $RESTORE_DATE_S ]]; then
echo "DISCARD backup ${backup["subvolume"]} because its date (${backup["date"]}) is too recent"
continue
else
echo "KEEP backup ${backup["subvolume"]} (${backup["date"]})"
fi
target_backups_reverse_ordered+=("$backup_row")
declare -A backup_details
# can't do "exec ... | parse_backup_info backup_details" because each command in a pipeline executes in a subshell
# subshell gets a copy of the array, but can't update it in the parent shell
parse_backup_info backup_details < <($exec_for_backup backup cat ${backup["dir"]}/${backup["info"]})
if [[ ! -v "backup_details[RECEIVED_PARENT_UUID]" ]]; then
echo "Backup ${backup["subvolume"]} is the most recent non-incremental backup before $RESTORE_DATE"
break
fi
# If you don't unset, these arrays stay populated with the elements from the previous loop iteration
unset backup_details
unset backup
done
if [[ ! -v target_backups_reverse_ordered ]]; then
echo "No valid backups for $RESTORE_DATE!"
exit -1
fi
for (( idx=${#target_backups_reverse_ordered[@]}-1 ; idx>=0 ; idx-- )) ; do
backup_row="${target_backups_reverse_ordered[$idx]}"
target_backups+=("$backup_row")
done
echo "Will restore ${#target_backups[@]} backups sequentially into $TARGET_SUBVOLUME"
## Remove conflicting subvolumes
echo "Checking for conflicting subvolumes in $TARGET_SUBVOLUME"
for idx in ${!target_backups[@]}; do
backup_row="${target_backups[$idx]}"
declare -A backup
parse_backup_row backup ${backup_row}
if [[ -d ${TARGET_SUBVOLUME}/${backup["subvolume"]} ]]; then
echo "Restore location (${TARGET_SUBVOLUME}/${backup["subvolume"]}) for ${backup["subvolume"]} already exists and must be removed!"
read -p "Press Ctrl-C to cancel, Enter if you have removed the directory, or R to remove automatically: " do_remove
if [[ "$do_remove" == "R" ]]; then
echo "Removing ${TARGET_SUBVOLUME}/${backup["subvolume"]} with btrfs subvolume delete..."
set -x
btrfs subvolume delete ${TARGET_SUBVOLUME}/${backup["subvolume"]}
set +x
fi
fi
if [[ -d ${TARGET_SUBVOLUME}/${backup["subvolume"]} ]]; then
echo "Conflicting files not removed. btrfs requires ${TARGET_SUBVOLUME}/${backup["subvolume"]} to be empty in order to restore the subvolume there."
exit -1
fi
echo "Backup $(( idx + 1 )): ${backup["backup_path"]} -> ${TARGET_SUBVOLUME}/${backup["subvolume"]} OK"
done
# Restore the backups
for idx in ${!target_backups[@]}; do
backup_row="${target_backups[$idx]}"
declare -A backup
parse_backup_row backup ${backup_row}
total_size_m=$($exec_for_backup backup find ${backup["dir"]} -type f -name ${backup["splits"]} -exec du -cm {} + | grep total$ | cut -f1 )
total_size_h=$($exec_for_backup backup find ${backup["dir"]} -type f -name ${backup["splits"]} -exec du -ch {} + | grep total$ | cut -f1 )
echo "Restore step $((idx + 1))/${#target_backups[@]} - ${backup["backup_path"]} ($total_size_h) -> ${TARGET_SUBVOLUME}/${backup["subvolume"]}}"
$exec_for_backup backup cat ${backup["dir"]}/${backup["splits"]} | pv --size ${total_size_m}M --buffer-size 2g | gpg --quiet --homedir '/home/justin/.gnupg' --decrypt | lz4 -1 -d -c | btrfs receive $TARGET_SUBVOLUME
unset backup
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment