Last active
June 22, 2023 15:03
-
-
Save caelor/03d95d4adcbb5cb17705d8d94e5e86dd to your computer and use it in GitHub Desktop.
Simple, opinionated BTRFS snapshot "time machine".
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Opinionated "timemachine" backups using btrfs snapshots | |
# | |
# Andy Boff, 2023-06-18. GPLv3 | |
# No warranty - do your own testing to be confident it works for you | |
# | |
# | |
# Intended for use on a quiet home NAS where the drives are | |
# hopefully spun down most of the time. If the disks have not | |
# been seen to spin up between scheduled snapshots, then the | |
# subsequent snapshot will be skipped (on the assumption nothing | |
# can have changed) | |
# | |
# The script uses btrfs generation numbers to determine if the data | |
# on the subvolume has changed since a snapshot was taken. | |
# | |
# When running, the script will check for a sleeping disk and will | |
# attempt to spin down the disks after doing any snapshotting, just | |
# in case they spun up for snapshot activity only. | |
# | |
# How many of each type to keep? | |
MAX_HOURLY=10 | |
MAX_DAILY=10 | |
MAX_MONTHLY=10 | |
MAX_YEARLY=10 | |
# | |
# ############################################################## | |
# | |
# The script manages the content of the snapshots subvol. It | |
# shouldn't get changed from outside of the script. | |
# within the .snapshots subvolume, individual snapshots are created | |
# named in the form: | |
# yyyy-mm-dd_hh-mm-ss_<type> | |
# Where type is one of [hourly|daily|monthly|yearly] | |
# | |
# | |
# FUNCTIONS START | |
# | |
# show the help text | |
show_help() { | |
me=$(basename $0) | |
echo | |
echo "Usage: $me <data subvol>" | |
echo | |
echo " e.g. $me /mnt/btrfs-drive/data/mydata" | |
echo | |
echo " Snapshots will be stored in .snapshots under mydata" | |
echo | |
echo | |
exit 1 | |
} | |
# find the mount hosting the source. Sets $MOUNTPOINT | |
find_mount() { | |
while read MNT_LINE; do | |
CHK_DEV=$(echo $MNT_LINE | awk '{ print $1 }') | |
CHK_MOUNTPOINT=$(echo $MNT_LINE | awk '{ print $3 }') | |
# non literal wildcard match | |
if [[ $SOURCE == $CHK_MOUNTPOINT* ]]; then | |
#echo $MNT_LINE | |
MOUNTPOINT="$CHK_MOUNTPOINT" | |
fi | |
done <<< $(mount | grep btrfs) | |
} | |
# returns the devices associated with a btrfs filesystem | |
find_devices() { | |
btrfs filesystem show $1 | grep devid | awk '{ print $8 }' | |
} | |
# get the sleep status of the hdd. | |
hdd_status() { | |
hdparm -C $1 | grep state | awk '{ print $4 }' | |
} | |
# list all the snapshots within the source of particular type | |
list_snapshots_of_type() { | |
TYPE=$1 | |
btrfs subvolume list -o $SOURCE/.snapshots | grep $TYPE | awk '{ print $9 }' | xargs -l1 basename 2>/dev/null | sort | |
} | |
# get a valid date string from a snapshot name | |
date_string_from_snapshot() { | |
SNAPNAME=$1 | |
PART_DATE=$(echo $SNAPNAME | awk -F_ '{ print $1 }') | |
PART_TIME=$(echo $SNAPNAME | awk -F_ '{ print $2 }' | sed 's/-/:/g') | |
echo "$PART_DATE $PART_TIME" | |
} | |
# get the latest snapshot of a type | |
latest_snapshot_of_type() { | |
list_snapshots_of_type $1 | tail -n1 | |
} | |
# get the oldest snapshot of a type | |
oldest_snapshot_of_type() { | |
list_snapshots_of_type $1 | head -n1 | |
} | |
# check if snapshot of type is due. Return 1 if so. | |
is_snapshot_due() { | |
TMP1=$(mktemp) | |
TMP2=$(mktemp) | |
TYPE=$1 | |
case $TYPE in | |
hourly) | |
touch -d "now - 1 hour" $TMP1 | |
;; | |
daily) | |
touch -d "now - 1 day" $TMP1 | |
;; | |
monthly) | |
touch -d "now - 1 month" $TMP1 | |
;; | |
yearly) | |
touch -d "now - 1 year" $TMP1 | |
;; | |
*) | |
echo "Error: Unknown type '$TYPE'" | |
;; | |
esac | |
LATEST_SNAP=$(latest_snapshot_of_type $TYPE) | |
SNAP_DATE=$(date_string_from_snapshot $LATEST_SNAP) | |
touch -d "$SNAP_DATE" $TMP2 | |
# TMP2 is now mtime at the last snapshot | |
# if TMP1 is newer than TMP2 then a snapshot is due | |
RES=0 | |
if [[ $TMP1 -nt $TMP2 ]]; then | |
RES=1 | |
echo "Yes" | |
else | |
echo "No" | |
fi | |
rm $TMP1 | |
rm $TMP2 | |
return $RES | |
} | |
# handle doing the snapshotting for a type | |
handle_type() { | |
TYPE=$1 | |
MAX_COUNT=$2 | |
NUM=$(list_snapshots_of_type $TYPE | wc -l) | |
LATEST=$(latest_snapshot_of_type $TYPE) | |
LATEST_DATE=$(date_string_from_snapshot $LATEST) | |
LATEST_GENERATION=$(subvol_generation $SOURCE/.snapshots/$LATEST) | |
OLDEST=$(oldest_snapshot_of_type $TYPE) | |
OLDEST_DATE=$(date_string_from_snapshot $OLDEST) | |
DUE="No" | |
is_snapshot_due $TYPE >/dev/null | |
if [ $? -eq 1 ]; then | |
DUE="Yes" | |
fi | |
if [ $NUM -eq 0 ]; then | |
DUE="Yes (no existing snapshot of this type)" | |
fi | |
NEEDED="No" | |
if [ $CURRENT_GENERATION -ne $LATEST_GENERATION ]; then | |
NEEDED="Yes" | |
fi | |
echo " $TYPE:" | |
echo " Number: $NUM / $MAX_COUNT" | |
echo " Oldest: $OLDEST_DATE ($OLDEST)" | |
echo " Newest: $LATEST_DATE ($LATEST, Generation=$LATEST_GENERATION)" | |
echo " Due: $DUE" | |
echo " Needed: $NEEDED" | |
# if the snapshot is due and is needed, then we need to | |
# make the snapshot regardless of whether the drive is | |
# awake or not. | |
if [ "$DUE" != "No" ]; then | |
if [ $NEEDED == "Yes" ]; then | |
DATE_STRING=$(date +%Y-%m-%d_%H-%M-%S) | |
SNAPNAME="${DATE_STRING}_${TYPE}" | |
echo " Action: Create snapshot ($SNAPNAME)" | |
btrfs subvolume snapshot -r $SOURCE $SOURCE/.snapshots/$SNAPNAME | |
fi | |
fi | |
# prune the oldest snapshot if we exceed the max count | |
# this might spin up the disk if somehow we ended up with too many | |
# but really the only way we can get to that state is if we just | |
# created one that took us over the threshold. | |
NUM=$(list_snapshots_of_type $TYPE | wc -l) | |
if [ $NUM -gt $MAX_COUNT ]; then | |
echo " Action: Prune oldest snapshot (max reached)" | |
btrfs subvolume delete $SOURCE/.snapshots/$OLDEST | |
fi | |
} | |
subvol_generation() { | |
SUBVOL=$1 | |
btrfs subvolume show $SUBVOL | grep Generation | awk '{ print $2 }' | |
} | |
# | |
# SCRIPT START | |
# | |
# Show help text if needed | |
if [ $# -ne 1 ]; then | |
show_help | |
fi | |
if [ $1 == "--help" ]; then | |
show_help | |
fi | |
SOURCE=$1 | |
echo " Subvolume: $SOURCE" | |
# sanity check - source needs to be a btrfs subvol | |
btrfs subvol show $SOURCE >/dev/null 2>/dev/null | |
if [ $? -ne 0 ]; then | |
echo "Error: $SOURCE is not a btrfs subvolume (or maybe you don't have permissions)" | |
exit 1 | |
fi | |
# does the .snapshots subvolume exist? | |
btrfs subvol show $SOURCE/.snapshots >/dev/null 2>/dev/null | |
if [ $? -ne 0 ]; then | |
# create the snapshots subvol | |
# this ensures that our work doesn't get included in the snapshot, because they don't nest. | |
echo "Init: Create .snapshots subvolume" | |
btrfs subvolume create $SOURCE/.snapshots | |
# this will have woken a sleeping drive | |
fi | |
# find the mountpoint that contains the source | |
find_mount | |
if [ $MOUNTPOINT == "" ]; then | |
echo "Error: Could not find the block device the subvolume is on." | |
exit 2 | |
fi | |
echo "btrfs mount: $MOUNTPOINT" | |
# check the wake/sleep status of the drive | |
REF_DEV=$(find_devices $MOUNTPOINT | tail -n1) | |
HDD_STATUS=$(hdd_status $REF_DEV) | |
echo " HDD status: $HDD_STATUS" | |
# get the current generation of the subvol being snapshotted | |
CURRENT_GENERATION=$(subvol_generation $SOURCE) | |
# handle the different types of snapshot | |
echo "Snapshots:" | |
handle_type "hourly" $MAX_HOURLY | |
handle_type "daily" $MAX_DAILY | |
handle_type "monthly" $MAX_MONTHLY | |
handle_type "yearly" $MAX_YEARLY | |
# spin down the drives if they've been spun up by a snapshot | |
HDD_NEW_STATUS=$(hdd_status $REF_DEV) | |
if [ $HDD_STATUS == "standby" ]; then | |
if [ $HDD_NEW_STATUS != "standby" ]; then | |
echo "Action: Spin down disks" | |
find_devices $MOUNTPOINT | xargs -l1 hdparm -y | |
fi | |
fi | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment