Last active
June 4, 2021 01:49
-
-
Save jpalpant/2c04ec5f89d7153e60bdac48b318fe44 to your computer and use it in GitHub Desktop.
Restore script for btrbk raw backups over SSH or from mountable CIFS target
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -eo pipefail | |
HELPTEXT="\ | |
Restore a btrbk raw-type backup of a single subvolume by reading the archives over SSH or over a CIFS mount. Requires btrfs-progs, btrbk, ssh, pv. gpg required if backups are encrypted; lz4 or other compression tool required if backups are compressed. | |
SSH-mode resembles how btrbk creates these archives, and should be possible for most btrbk raw target use-cases. This is the default. SSH host and port information is read from btrbk. SSH user and SSH key information is not - these will use SSH defaults, or can be provided by arguments. | |
CIFS-mode is just because my backups are stored on slow NAS that exposes a CIFS, and CIFS is faster than SSH. | |
Usage: btrbk-restore SOURCE_PATH | |
Options: | |
-i/--ssh-id KEYFILE - path to an SSH private key to use for SSH commands. Optional, defaults to using your SSH config. | |
--ssh-user USER - target user to use for SSH commands 'ssh USER@...'. Optional, uses SSH defaults if not needed. | |
--cifs CIFS_MOUNT_POINT - restore tarballs via mounting the CIFS target CIFS_MOUNT_POINT to a temporary directory. | |
--cifs-user USER - use the given username as the -o user=USER argument to the mount.cifs command. Default: "'$USER'" ($USER) | |
-d/--dest PATH - path to an existing local subvolume where backups will be restored | |
--at DATE - target date for backup restore. Will restore backups necessary to get the backup which immediately precedes the date given. Default: now | |
" | |
function parse_backup_row { | |
declare -n backup_array=$1 | |
local row="${@:2}" | |
backup_array["host"]=$(echo "$row" | awk -F' ' '{print $4}') | |
backup_array["port"]=$(echo "$row" | awk -F' ' '{print $5}') | |
backup_array["backup_path"]=$(echo "$row" | awk -F' ' '{print $6}') | |
backup_array["dir"]=$(dirname ${backup_array["backup_path"]}) | |
backup_array["backup_name"]=$(basename ${backup_array["backup_path"]}) | |
backup_array["datestamp"]=$(echo ${backup_array["backup_name"]} | cut -d. -f2) | |
backup_array["date"]=$(date -d ${backup_array["datestamp"]}) | |
backup_array["date_s"]=$(date -d ${backup_array["datestamp"]} +%s) | |
backup_array["subvolume"]=$(echo ${backup_array["backup_name"]} | cut -d. -f1-2) | |
backup_array["snapshot"]=$(echo ${backup_array["backup_name"]} | cut -d. -f1) | |
backup_array["splits"]=${backup_array["backup_name"]}.split_* | |
backup_array["info"]=${backup_array["backup_name"]}.info | |
if [[ "$MODE" == "cifs" ]]; then | |
backup["dir"]=${CIFS_MOUNT_DIR} | |
fi | |
} | |
function parse_backup_info { | |
declare -n info_array=$1 | |
while IFS== read -r key value; do | |
if [[ -n "$key" ]] && [[ -n "$value" ]]; then | |
info_array["$key"]=$value | |
fi | |
done | |
} | |
function cifs_exec() { | |
declare -n _backup=$1; shift | |
$@ | |
} | |
function ssh_exec() { | |
declare -n _backup=$1 ; shift | |
ssh_opts="-T -o Compression=no" | |
if [[ -v SSH_ID ]]; then | |
ssh_opts="$ssh_opts -i $SSH_ID" | |
fi | |
btrssh="ssh $ssh_opts -p ${backup["port"]} -C $SSH_USER@${backup["host"]}" | |
$btrssh $@ | |
} | |
## Argument parsing | |
POSITIONAL=() | |
while [[ $# -gt 0 ]]; do | |
key="$1" | |
case $key in | |
-h|--help) | |
echo "$HELPTEXT" | |
exit 0 | |
;; | |
-i|--ssh-id) | |
if [[ -v MODE ]] && [[ "$MODE" != "ssh" ]]; then | |
echo "Cannot provide --ssh-id when using mode $MODE." | |
exit -1 | |
else | |
MODE="ssh" | |
SSH_ID=$2 | |
fi | |
shift | |
shift | |
;; | |
--ssh-user) | |
if [[ -v MODE ]] && [[ "$MODE" != "ssh" ]]; then | |
echo "Cannot provide --ssh-user when using mode $MODE." | |
exit -1 | |
else | |
MODE="ssh" | |
SSH_USER=$2 | |
fi | |
shift | |
shift | |
;; | |
--cifs) | |
if [[ -v MODE ]] && [[ "$MODE" != "cifs" ]]; then | |
echo "Cannot provide --cifs when in mode $MODE" | |
exit -1 | |
else | |
MODE="cifs" | |
CIFS_MOUNT_POINT=$2 | |
fi | |
shift | |
shift | |
;; | |
--cifs-user) | |
if [[ -v MODE ]] && [[ "$MODE" != "cifs" ]]; then | |
echo "Cannot provide --cifs-user when using mode $MODE." | |
exit -1 | |
else | |
MODE="cifs" | |
CIFS_USER=$2 | |
fi | |
shift | |
shift | |
;; | |
-d|--dest) | |
TARGET_SUBVOLUME=$2 | |
shift | |
shift | |
;; | |
--at) | |
if date -d "$2" > /dev/null ; then | |
RESTORE_DATE="$(date -d $2)" | |
else | |
echo "Date $2 cannot be validated by date -d, please enter a valid datestamp" | |
exit -1 | |
fi | |
shift | |
shift | |
;; | |
*) | |
POSITIONAL+=("$1") # save it in an array for later | |
shift # past argument | |
;; | |
esac | |
done | |
## Clarify and validate arguments | |
if [[ ! -v MODE ]]; then | |
MODE="ssh" | |
fi | |
if [[ ! -v RESTORE_DATE ]]; then | |
RESTORE_DATE="$(date -d today)" | |
fi | |
RESTORE_DATE_S=$(date -d "$RESTORE_DATE" +%s) | |
set -u # no unbound variables after this | |
if [[ ! -v TARGET_SUBVOLUME ]]; then | |
read -e -p "Please enter local subvolume to target for restoring backups: " TARGET_SUBVOLUME | |
fi | |
TARGET_SUBVOLUME=${TARGET_SUBVOLUME%/} | |
if ! btrfs subvolume show $TARGET_SUBVOLUME > /dev/null; then | |
echo "Target $TARGET_SUBVOLUME is not a btrfs subvolume." | |
exit -1 | |
fi | |
if [[ ! -v POSITIONAL[0] ]]; then | |
OIFS=$IFS | |
IFS=$'\n' | |
sources=( $(btrbk list source) ) | |
IFS=$OIFS | |
echo ${sources[0]} | |
for idx in ${!sources[@]}; do | |
if [[ $idx != 0 ]]; then | |
echo "$idx) ${sources[$idx]}" | |
fi | |
done | |
read -p "Choose subvolume to resource: " source_idx | |
if ! [[ "$source_idx" =~ ^[0-9]+$ && $source_idx -gt 0 && $source_idx -lt ${#sources[@]} ]] ; then | |
echo "Invalid selection $source_idx, please enter an integer from 1-$(( ${#sources[@]} - 1 ))" | |
exit -1 | |
fi | |
source_row=${sources[$source_idx]} | |
else | |
source_name=${POSITIONAL[0]} | |
source_row=$(btrbk list source | grep $source_name) | |
fi | |
if [[ -z "$source_row" ]]; then | |
echo "Could not find value backup source definition to restore" | |
exit -1 | |
fi | |
snapshot_name=$(echo ${source_row} | awk -F' ' '{print $3}') | |
source_subvolume=$(echo ${source_row} | awk -F' ' '{print $1}') | |
echo "Restoring backups for $snapshot_name as close to $RESTORE_DATE as possible (not later)" | |
## Prep according to the given mode | |
if [[ "$MODE" == "ssh" ]]; then | |
exec_for_backup=ssh_exec | |
elif [[ "$MODE" == "cifs" ]]; then | |
CIFS_MOUNT_DIR=$(mktemp -d) | |
mount_command="mount -v -t cifs -o vers=3.0 $CIFS_MOUNT_POINT $CIFS_MOUNT_DIR -o user=${CIFS_USER:-$USER}" | |
echo "Mounting: $mount_command" | |
eval $mount_command | |
trap "umount $CIFS_MOUNT_DIR; rmdir $CIFS_MOUNT_DIR" INT EXIT; | |
exec_for_backup=cifs_exec | |
fi | |
## Fetch candidate backups | |
OIFS=$IFS | |
IFS=$'\n' | |
initial_backup_list=( $(btrbk list backups | grep "$source_subvolume") ) | |
IFS=$OIFS | |
## Filter backups to match the correct date requested. Select backups from the last non-incremental backup before the target date up through the target date. | |
echo "Filtering unnecessary backups" | |
declare -a target_backups_reverse_ordered | |
declare -a target_backups | |
for (( idx=${#initial_backup_list[@]}-1 ; idx>=0 ; idx-- )) ; do | |
backup_row="${initial_backup_list[$idx]}" | |
declare -A backup | |
parse_backup_row backup ${backup_row} | |
if [[ ${backup["date_s"]} -gt $RESTORE_DATE_S ]]; then | |
echo "DISCARD backup ${backup["subvolume"]} because its date (${backup["date"]}) is too recent" | |
continue | |
else | |
echo "KEEP backup ${backup["subvolume"]} (${backup["date"]})" | |
fi | |
target_backups_reverse_ordered+=("$backup_row") | |
declare -A backup_details | |
# can't do "exec ... | parse_backup_info backup_details" because each command in a pipeline executes in a subshell | |
# subshell gets a copy of the array, but can't update it in the parent shell | |
parse_backup_info backup_details < <($exec_for_backup backup cat ${backup["dir"]}/${backup["info"]}) | |
if [[ ! -v "backup_details[RECEIVED_PARENT_UUID]" ]]; then | |
echo "Backup ${backup["subvolume"]} is the most recent non-incremental backup before $RESTORE_DATE" | |
break | |
fi | |
# If you don't unset, these arrays stay populated with the elements from the previous loop iteration | |
unset backup_details | |
unset backup | |
done | |
if [[ ! -v target_backups_reverse_ordered ]]; then | |
echo "No valid backups for $RESTORE_DATE!" | |
exit -1 | |
fi | |
for (( idx=${#target_backups_reverse_ordered[@]}-1 ; idx>=0 ; idx-- )) ; do | |
backup_row="${target_backups_reverse_ordered[$idx]}" | |
target_backups+=("$backup_row") | |
done | |
echo "Will restore ${#target_backups[@]} backups sequentially into $TARGET_SUBVOLUME" | |
## Remove conflicting subvolumes | |
echo "Checking for conflicting subvolumes in $TARGET_SUBVOLUME" | |
for idx in ${!target_backups[@]}; do | |
backup_row="${target_backups[$idx]}" | |
declare -A backup | |
parse_backup_row backup ${backup_row} | |
if [[ -d ${TARGET_SUBVOLUME}/${backup["subvolume"]} ]]; then | |
echo "Restore location (${TARGET_SUBVOLUME}/${backup["subvolume"]}) for ${backup["subvolume"]} already exists and must be removed!" | |
read -p "Press Ctrl-C to cancel, Enter if you have removed the directory, or R to remove automatically: " do_remove | |
if [[ "$do_remove" == "R" ]]; then | |
echo "Removing ${TARGET_SUBVOLUME}/${backup["subvolume"]} with btrfs subvolume delete..." | |
set -x | |
btrfs subvolume delete ${TARGET_SUBVOLUME}/${backup["subvolume"]} | |
set +x | |
fi | |
fi | |
if [[ -d ${TARGET_SUBVOLUME}/${backup["subvolume"]} ]]; then | |
echo "Conflicting files not removed. btrfs requires ${TARGET_SUBVOLUME}/${backup["subvolume"]} to be empty in order to restore the subvolume there." | |
exit -1 | |
fi | |
echo "Backup $(( idx + 1 )): ${backup["backup_path"]} -> ${TARGET_SUBVOLUME}/${backup["subvolume"]} OK" | |
done | |
# Restore the backups | |
for idx in ${!target_backups[@]}; do | |
backup_row="${target_backups[$idx]}" | |
declare -A backup | |
parse_backup_row backup ${backup_row} | |
total_size_m=$($exec_for_backup backup find ${backup["dir"]} -type f -name ${backup["splits"]} -exec du -cm {} + | grep total$ | cut -f1 ) | |
total_size_h=$($exec_for_backup backup find ${backup["dir"]} -type f -name ${backup["splits"]} -exec du -ch {} + | grep total$ | cut -f1 ) | |
echo "Restore step $((idx + 1))/${#target_backups[@]} - ${backup["backup_path"]} ($total_size_h) -> ${TARGET_SUBVOLUME}/${backup["subvolume"]}}" | |
$exec_for_backup backup cat ${backup["dir"]}/${backup["splits"]} | pv --size ${total_size_m}M --buffer-size 2g | gpg --quiet --homedir '/home/justin/.gnupg' --decrypt | lz4 -1 -d -c | btrfs receive $TARGET_SUBVOLUME | |
unset backup | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment