Skip to content

Instantly share code, notes, and snippets.

@yrro
Last active October 13, 2024 00:53
Show Gist options
  • Save yrro/d245b45d4ba7a097738bdb726a982573 to your computer and use it in GitHub Desktop.
Save yrro/d245b45d4ba7a097738bdb726a982573 to your computer and use it in GitHub Desktop.
Orchestrate backups via borgbackup
#!/usr/bin/python3
# Usage: 'backup' alone will back the system up
# 'backup ARGS' will run borg with ARGS, configuring repository location, passphrase etc.
# e.g., 'backup list ::' will list backups in the repository
# Goals: run on RHEL 8's default Python interpreter (3.6) with no non-RHEL packages required
# Non-goals: backing up multiple filesystems, any configurability save modifying values set
# in code.
# /etc/systemd/system/backup.service
#[Service]
#Type=oneshot
#ExecStart=/usr/local/sbin/backup
#Environment=PYTHONFAULTHANDLER=1
#TimeoutStartSec=2h
#IOSchedulingClass=idle
# /etc/systemd/system/backup.timer
#[Timer]
#OnCalendar=*-*-* 02:00:00
#RandomizedDelaySec=1h
#
#[Install]
#WantedBy=timers.target
# cat /root/borg.env
#BORG_REPO=ssh://whatever.example.com/
#BORG_RSH="ssh -i /root/.ssh/id_borgbackup -o BatchMode=yes"
#BORG_PASSPHRASE=...
#BORG_REMOTE_PATH=borg1 # needed for rsync.net
# /etc/prometheus/rules.d/backup.yml
#groups:
#- name: backup
# rules:
# - alert: BackupLastCompletionAge
# expr: (time() - backup_completion_time_seconds) > (1.5 * 86400)
# annotations:
# summary: "{{ $labels.instance }} is not backed up"
# description: "{{ $labels.instance }}'s last successful backup was {{ $value | humanizeDuration }} ago."
# labels:
# severity: warning
#...
## vim: ts=8 sts=2 sw=2 et
from contextlib import contextmanager
import ctypes
from datetime import datetime, timezone
import logging
import os
import signal
import subprocess
import sys
import tempfile
import dotenv
import libmount
from prometheus_client import CollectorRegistry, Gauge, write_to_textfile
from systemd.journal import JournalHandler
logger = logging.getLogger("backup")
exclude_patterns = [
("sh", "tmp/*"),
("sh", "var/tmp/*"),
("sh", "var/cache/PackageKit/*"),
("sh", "var/cache/dnf/*"),
("sh", "srv/prometheus/*"),
("pf", "var/log/lastlog"),
]
libc = ctypes.CDLL("libc.so.6", use_errno=True)
CLONE_NEWNS = 0x00020000 # <sched.h>
libc.unshare.argtypes = ctypes.c_int,
libc.unshare.restype = ctypes.c_int
libc.setns.argtypes = ctypes.c_int, ctypes.c_int
libc.setns.restype = ctypes.c_int
prom_reg = CollectorRegistry()
prom_status = Gauge("backup_completion_time_seconds", "", registry=prom_reg)
def main(argv):
argv.pop(0) # ignore argv[0]
if argv:
p = run_borg(argv)
return p.returncode
else:
p1 = backup()
p2 = prune()
p3 = compact()
exit_status = max(p1.returncode, p2.returncode, p3.returncode)
if exit_status <= 1: # ignore warnings
record_success()
return exit_status
def backup():
with lv_snapshot("xoanon", "root-snapshot", "root"):
with tempfile.TemporaryDirectory(prefix="backup-") as mnt:
with unshare():
with mount2("/dev/xoanon/root-snapshot", mnt, options=["ro", "nouuid"]):
return create_backup(mnt)
def record_success():
prom_status.set_to_current_time()
write_to_textfile("/srv/node-exporter/backup.prom", prom_reg)
@contextmanager
def lv_snapshot(vg, lv_snapshot, lv_origin):
subprocess.run(["lvcreate", "--snapshot", "-n", lv_snapshot, "-L", "12G", f"{vg}/{lv_origin}"], check=True)
try:
yield
finally:
subprocess.run(["lvremove", "-y", f"{vg}/{lv_snapshot}"], check=True)
@contextmanager
def unshare():
with open(f"/proc/{os.getpid()}/ns/mnt", "rb") as mnt_ns:
r1 = libc.unshare(CLONE_NEWNS)
if r1 != 0:
raise OSError(ctypes.get_errno(), "Could not unshare mount namespace")
try:
subprocess.run(["mount", "--make-private", "/"], check=True)
except:
logger.error("Could not make mount namespace private")
raise
try:
yield
finally:
r2 = libc.setns(mnt_ns.fileno(), CLONE_NEWNS)
if r2 != 0:
raise OSError(ctypes.get_errno(), "Could not restore mount namespace")
@contextmanager
def mount(source, target, options=[]):
if os.path.ismount(target):
raise Exception(f"{target} is already a mount point, refusing to mount on top")
subprocess.run(["mount", "-o", ",".join(options), source, target], check=True)
try:
yield
finally:
subprocess.run(["umount", target], check=True)
@contextmanager
def mount2(source, target, options=[]):
if os.path.ismount(target):
raise Exception(f"{target} is already a mount point, refusing to mount on top")
c = libmount.Context()
c.source = source
c.target = target
c.options = ",".join(options)
c.mount()
try:
yield
finally:
c.umount()
def create_backup(path):
logger.info("Creating backup...")
progress_argv = ["--progress"] if os.isatty(sys.stdin.fileno()) else []
exclude_argv=[f"--exclude={prefix}:{pattern}" for (prefix, pattern) in exclude_patterns]
return run_borg([
"create",
"-v",
"--stats",
*progress_argv,
"--exclude-caches",
"--keep-exclude-tags",
"--compression=auto,lz4",
*exclude_argv,
"::{hostname}@{utcnow}Z",
".",
], cwd=path)
def prune():
logger.info("Pruning old backups...")
return run_borg([
"prune",
"--stats",
"--keep-daily=7",
"--keep-weekly=4",
"--keep-monthly=5",
"--keep-yearly=5",
"::"
])
def compact():
logger.info("Compacting repository...")
return run_borg([
"compact",
"::",
])
def run_borg(args, cwd=None):
local_args = args.copy()
local_args.extend(["--show-rc", "--iec"])
logger.debug("borg arguments: %r", args)
env = os.environ.copy()
env.update(dotenv.dotenv_values("/root/borg.env", verbose=True))
p = subprocess.Popen(["/usr/local/bin/borg-linux64", *local_args], cwd=cwd, env=env)
try:
p.wait()
except:
# subprocess.run sends SIGKILL but we want to give borg a chance to
# exit cleanly.
logger.info("Terminating borg process (id %s)...", p.pid)
p.terminate()
p.wait()
raise
if p.returncode == 0:
logger.debug("borg succeeded")
elif p.returncode == 1:
logger.warning("borg succeeded with warnings")
else:
logger.error("borg failed when called with arguments: %r", args)
return p
def handle_sigterm(signalnumber, frame):
logger.error("Caught %s", signal.Signals(signalnumber).name)
# raise SystemExit so that finally blocks will clean up running processes,
# mounts, snapshots, etc.
sys.exit(99)
def configure_logging():
level = os.environ.get("BACKUP_LOG_LEVEL", "INFO").upper()
if "INVOCATION_ID" in os.environ:
handlers = [JournalHandler(SYSLOG_IDENTIFIER="backup")]
else:
handlers = None
logging.basicConfig(
level=level,
handlers=handlers,
format="%(message)s",
)
logging.captureWarnings(True)
if __name__ == "__main__":
configure_logging()
signal.signal(signal.SIGTERM, handle_sigterm)
sys.exit(main(sys.argv))
# vim: ts=8 sts=4 sw=4 et
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment