Last active
October 13, 2024 00:53
-
-
Save yrro/d245b45d4ba7a097738bdb726a982573 to your computer and use it in GitHub Desktop.
Orchestrate backups via borgbackup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# Usage: 'backup' alone will back the system up | |
# 'backup ARGS' will run borg with ARGS, configuring repository location, passphrase etc. | |
# e.g., 'backup list ::' will list backups in the repository | |
# Goals: run on RHEL 8's default Python interpreter (3.6) with no non-RHEL packages required | |
# Non-goals: backing up multiple filesystems, any configurability save modifying values set | |
# in code. | |
# /etc/systemd/system/backup.service | |
#[Service] | |
#Type=oneshot | |
#ExecStart=/usr/local/sbin/backup | |
#Environment=PYTHONFAULTHANDLER=1 | |
#TimeoutStartSec=2h | |
#IOSchedulingClass=idle | |
# /etc/systemd/system/backup.timer | |
#[Timer] | |
#OnCalendar=*-*-* 02:00:00 | |
#RandomizedDelaySec=1h | |
# | |
#[Install] | |
#WantedBy=timers.target | |
# cat /root/borg.env | |
#BORG_REPO=ssh://whatever.example.com/ | |
#BORG_RSH="ssh -i /root/.ssh/id_borgbackup -o BatchMode=yes" | |
#BORG_PASSPHRASE=... | |
#BORG_REMOTE_PATH=borg1 # needed for rsync.net | |
# /etc/prometheus/rules.d/backup.yml | |
#groups: | |
#- name: backup | |
# rules: | |
# - alert: BackupLastCompletionAge | |
# expr: (time() - backup_completion_time_seconds) > (1.5 * 86400) | |
# annotations: | |
# summary: "{{ $labels.instance }} is not backed up" | |
# description: "{{ $labels.instance }}'s last successful backup was {{ $value | humanizeDuration }} ago." | |
# labels: | |
# severity: warning | |
#... | |
## vim: ts=8 sts=2 sw=2 et | |
from contextlib import contextmanager | |
import ctypes | |
from datetime import datetime, timezone | |
import logging | |
import os | |
import signal | |
import subprocess | |
import sys | |
import tempfile | |
import dotenv | |
import libmount | |
from prometheus_client import CollectorRegistry, Gauge, write_to_textfile | |
from systemd.journal import JournalHandler | |
logger = logging.getLogger("backup") | |
exclude_patterns = [ | |
("sh", "tmp/*"), | |
("sh", "var/tmp/*"), | |
("sh", "var/cache/PackageKit/*"), | |
("sh", "var/cache/dnf/*"), | |
("sh", "srv/prometheus/*"), | |
("pf", "var/log/lastlog"), | |
] | |
libc = ctypes.CDLL("libc.so.6", use_errno=True) | |
CLONE_NEWNS = 0x00020000 # <sched.h> | |
libc.unshare.argtypes = ctypes.c_int, | |
libc.unshare.restype = ctypes.c_int | |
libc.setns.argtypes = ctypes.c_int, ctypes.c_int | |
libc.setns.restype = ctypes.c_int | |
prom_reg = CollectorRegistry() | |
prom_status = Gauge("backup_completion_time_seconds", "", registry=prom_reg) | |
def main(argv): | |
argv.pop(0) # ignore argv[0] | |
if argv: | |
p = run_borg(argv) | |
return p.returncode | |
else: | |
p1 = backup() | |
p2 = prune() | |
p3 = compact() | |
exit_status = max(p1.returncode, p2.returncode, p3.returncode) | |
if exit_status <= 1: # ignore warnings | |
record_success() | |
return exit_status | |
def backup(): | |
with lv_snapshot("xoanon", "root-snapshot", "root"): | |
with tempfile.TemporaryDirectory(prefix="backup-") as mnt: | |
with unshare(): | |
with mount2("/dev/xoanon/root-snapshot", mnt, options=["ro", "nouuid"]): | |
return create_backup(mnt) | |
def record_success(): | |
prom_status.set_to_current_time() | |
write_to_textfile("/srv/node-exporter/backup.prom", prom_reg) | |
@contextmanager | |
def lv_snapshot(vg, lv_snapshot, lv_origin): | |
subprocess.run(["lvcreate", "--snapshot", "-n", lv_snapshot, "-L", "12G", f"{vg}/{lv_origin}"], check=True) | |
try: | |
yield | |
finally: | |
subprocess.run(["lvremove", "-y", f"{vg}/{lv_snapshot}"], check=True) | |
@contextmanager | |
def unshare(): | |
with open(f"/proc/{os.getpid()}/ns/mnt", "rb") as mnt_ns: | |
r1 = libc.unshare(CLONE_NEWNS) | |
if r1 != 0: | |
raise OSError(ctypes.get_errno(), "Could not unshare mount namespace") | |
try: | |
subprocess.run(["mount", "--make-private", "/"], check=True) | |
except: | |
logger.error("Could not make mount namespace private") | |
raise | |
try: | |
yield | |
finally: | |
r2 = libc.setns(mnt_ns.fileno(), CLONE_NEWNS) | |
if r2 != 0: | |
raise OSError(ctypes.get_errno(), "Could not restore mount namespace") | |
@contextmanager | |
def mount(source, target, options=[]): | |
if os.path.ismount(target): | |
raise Exception(f"{target} is already a mount point, refusing to mount on top") | |
subprocess.run(["mount", "-o", ",".join(options), source, target], check=True) | |
try: | |
yield | |
finally: | |
subprocess.run(["umount", target], check=True) | |
@contextmanager | |
def mount2(source, target, options=[]): | |
if os.path.ismount(target): | |
raise Exception(f"{target} is already a mount point, refusing to mount on top") | |
c = libmount.Context() | |
c.source = source | |
c.target = target | |
c.options = ",".join(options) | |
c.mount() | |
try: | |
yield | |
finally: | |
c.umount() | |
def create_backup(path): | |
logger.info("Creating backup...") | |
progress_argv = ["--progress"] if os.isatty(sys.stdin.fileno()) else [] | |
exclude_argv=[f"--exclude={prefix}:{pattern}" for (prefix, pattern) in exclude_patterns] | |
return run_borg([ | |
"create", | |
"-v", | |
"--stats", | |
*progress_argv, | |
"--exclude-caches", | |
"--keep-exclude-tags", | |
"--compression=auto,lz4", | |
*exclude_argv, | |
"::{hostname}@{utcnow}Z", | |
".", | |
], cwd=path) | |
def prune(): | |
logger.info("Pruning old backups...") | |
return run_borg([ | |
"prune", | |
"--stats", | |
"--keep-daily=7", | |
"--keep-weekly=4", | |
"--keep-monthly=5", | |
"--keep-yearly=5", | |
"::" | |
]) | |
def compact(): | |
logger.info("Compacting repository...") | |
return run_borg([ | |
"compact", | |
"::", | |
]) | |
def run_borg(args, cwd=None): | |
local_args = args.copy() | |
local_args.extend(["--show-rc", "--iec"]) | |
logger.debug("borg arguments: %r", args) | |
env = os.environ.copy() | |
env.update(dotenv.dotenv_values("/root/borg.env", verbose=True)) | |
p = subprocess.Popen(["/usr/local/bin/borg-linux64", *local_args], cwd=cwd, env=env) | |
try: | |
p.wait() | |
except: | |
# subprocess.run sends SIGKILL but we want to give borg a chance to | |
# exit cleanly. | |
logger.info("Terminating borg process (id %s)...", p.pid) | |
p.terminate() | |
p.wait() | |
raise | |
if p.returncode == 0: | |
logger.debug("borg succeeded") | |
elif p.returncode == 1: | |
logger.warning("borg succeeded with warnings") | |
else: | |
logger.error("borg failed when called with arguments: %r", args) | |
return p | |
def handle_sigterm(signalnumber, frame): | |
logger.error("Caught %s", signal.Signals(signalnumber).name) | |
# raise SystemExit so that finally blocks will clean up running processes, | |
# mounts, snapshots, etc. | |
sys.exit(99) | |
def configure_logging(): | |
level = os.environ.get("BACKUP_LOG_LEVEL", "INFO").upper() | |
if "INVOCATION_ID" in os.environ: | |
handlers = [JournalHandler(SYSLOG_IDENTIFIER="backup")] | |
else: | |
handlers = None | |
logging.basicConfig( | |
level=level, | |
handlers=handlers, | |
format="%(message)s", | |
) | |
logging.captureWarnings(True) | |
if __name__ == "__main__": | |
configure_logging() | |
signal.signal(signal.SIGTERM, handle_sigterm) | |
sys.exit(main(sys.argv)) | |
# vim: ts=8 sts=4 sw=4 et |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment