Skip to content

Instantly share code, notes, and snippets.

@mentha
Last active December 19, 2023 12:50
Show Gist options
  • Save mentha/22be943df3b4a855a25dacb1b7e93b3c to your computer and use it in GitHub Desktop.
Save mentha/22be943df3b4a855a25dacb1b7e93b3c to your computer and use it in GitHub Desktop.
btrfs dedupe and backup scripts
#!/usr/bin/env python3
from argparse import ArgumentParser
from contextlib import suppress
from datetime import datetime, timezone
from functools import cached_property
from sys import argv, stderr
from time import time
import json
import os
import re
import shlex
import subprocess as sp
import yaml
CONFIG_DEFAULT = '''\
## btrfs borg configuration
#subvolume: subvolume # path to target subvolume
#retention:
# - span: 1d
# interval: 1h
# - span: 1w
# interval: 1d
# - span: 1M
# interval: 1w
#borg:
# - repo:
# name: # backup name
# compression: zstd
# options: # options as sh commandline, for borg-create and borg-list
# create-options:
# retention:
# - span: 1d
# interval: 4h
# - span: 1w
# interval: 1d
# - span: 6M
# interval: 1w
'''
class RetentionRule:
@staticmethod
def parse_span(span):
units = {
'usec': 1e-6,
'us': 1e-6,
'μs': 1e-6,
'msec': 1e-3,
'ms': 1e-3,
'seconds': 1,
'second': 1,
'sec': 1,
's': 1,
'minutes': 60,
'minute': 60,
'min': 60,
'm': 60,
'hours': 3600,
'hour': 3600,
'hr': 3600,
'h': 3600,
'days': 86400,
'day': 86400,
'd': 86400,
'weeks': 86400 * 7,
'week': 86400 * 7,
'w': 86400 * 7,
'months': 86400 * 365.25 / 12,
'month': 86400 * 365.25 / 12,
'M': 86400 * 365.25 / 12,
'years': 86400 * 365.25,
'year': 86400 * 365.25,
'y': 86400 * 365.25,
}
s = 0
n = None
for p in re.findall(r'[0-9.]+|[a-zA-Z]+', span):
if p[0].isalpha():
s += n * units[p]
n = None
else:
n = float(p)
if n is not None:
s += n
return s
def __init__(self, rules):
self.rules = sorted([
(self.parse_span(r['span']), self.parse_span(r['interval']))
for r in rules
])
@staticmethod
def interval_adjust(i):
if i > 600:
i -= min(i / 10, 3600)
return i
def filter_expired(self, objs, key=float, now=None):
if now is None:
now = time()
lastts = None
for ts, obj in sorted([(key(o), o) for o in objs]):
age = now - ts
if age <= 0:
continue
interval = None
for span, spanint in self.rules:
if span >= age:
interval = spanint
break
if interval is None:
yield obj
continue
interval = self.interval_adjust(interval)
if lastts is not None and ts - lastts < interval:
yield obj
continue
lastts = ts
class BtrfsBorgConfig:
def __init__(self, snapvol):
d = None
with open(os.path.join(snapvol, 'config.yaml'), 'rb') as f:
d = yaml.safe_load(f)
self.subvolume = self._parse_subvolume(d.get('subvolume', 'subvolume'), snapvol)
self.retention = RetentionRule(d.get('retention', [
{ 'span': '1d', 'interval': '1h' },
{ 'span': '1w', 'interval': '1d' },
{ 'span': '1M', 'interval': '1w' },
]))
self.borg = [self.BorgRepo(c) for c in d.get('borg', [])]
@staticmethod
def _parse_subvolume(v, snapvol):
if os.path.isabs(v):
return v
return os.path.realpath(os.path.join(snapvol, v))
class BorgRepo:
def __init__(self, r):
self.repo = r['repo']
self.name = r['name']
self.compression = r.get('compression', 'zstd')
self.options = shlex.split(r.get('options', ''))
self.create_options = shlex.split(r.get('create-options', ''))
self.retention = RetentionRule(r.get('retention', [
{ 'span': '1d', 'interval': '4h' },
{ 'span': '1w', 'interval': '1d' },
{ 'span': '6M', 'interval': '1w' },
]))
class FileLock:
def __init__(self, path):
self.f = open(path, 'ab')
def __enter__(self):
try:
os.lockf(self.f.fileno(), os.F_TLOCK, 0)
return self
except OSError:
self.f.close()
raise
def __exit__(self, *a):
self.f.close()
class BtrfsBorg:
@staticmethod
def unshared_main(target_path, src_path, *cmd):
sp.run(['mount', '--rbind', src_path, target_path], check=True)
os.execvp(cmd[0], cmd)
@staticmethod
def init_vol(subvol, snapvol):
sp.run(['btrfs', 'subvolume', 'create', snapvol], check=True)
os.mkdir(os.path.join(snapvol, 'snapshots'))
if not os.path.isabs(subvol):
subvol = os.path.relpath(subvol, snapvol)
os.symlink(subvol, os.path.join(snapvol, 'subvolume'))
with open(os.path.join(snapvol, 'config.yaml'), 'w', encoding='utf-8') as f:
f.write(CONFIG_DEFAULT)
@staticmethod
def eprint(*a):
print(*a, file=stderr)
stderr.flush()
@classmethod
def run_backup(cls, vols, dry):
failed = False
compact_repos = set()
for vol in sorted(vols):
cls.eprint(f'Backing up {vol}')
succeed = False
with suppress(Exception):
if dry:
cls.eprint(f'would back up {vol}')
else:
bb = cls(vol, compact_repos)
bb.do_backup()
succeed = True
cls.eprint(f'Backing up of {vol} ' + ('succeeded' if succeed else 'failed'))
if not succeed:
failed = True
for repo in sorted(compact_repos):
cls.eprint(f'Compacting repo {repo}')
sp.run(['borg', 'compact', repo], check=True)
if failed:
exit(1)
@classmethod
def main(cls):
if len(argv) >= 2 and argv[1] == '--unshared':
return cls.unshared_main(*argv[2:])
a = ArgumentParser(description='Btrfs backup tool')
a.add_argument('--init', metavar='SUBVOLUME', help='initialize new snapshot path')
a.add_argument('--dry', action='store_true', help='dry run')
a.add_argument('snapshot_vol', nargs='+', help='snapshot path or directory of snapshot vols')
a = a.parse_args()
if a.init:
if len(a.snapshot_vol) != 1:
raise RuntimeError('only one volume could be initialized')
if a.dry:
vol = a.snapshot_vol[0]
cls.eprint(f'would initialize {vol}')
else:
cls.init_vol(a.init, vol)
else:
vols = []
for v in a.snapshot_vol:
if os.path.exists(os.path.join(v, 'config.yaml')):
vols.append(os.path.realpath(v))
else:
for e in os.listdir(v):
p = os.path.realpath(os.path.join(v, e))
if os.path.isdir(p):
vols.append(p)
cls.run_backup(vols, a.dry)
def __init__(self, vol, compact_repo=None):
self.snapvol = vol
if compact_repo is None:
compact_repo = set()
self.compact_repo = compact_repo
@cached_property
def config(self):
return BtrfsBorgConfig(self.snapvol)
@staticmethod
def parse_ts(s):
return datetime.fromisoformat(s).timestamp()
@staticmethod
def format_ts(ts):
return datetime.fromtimestamp(ts, timezone.utc).isoformat()
def do_backup(self):
with FileLock(os.path.join(self.snapvol, 'lock')):
newvol = os.path.join(self.snapvol, 'snapshots', '.new')
if os.path.exists(newvol):
sp.run(['btrfs', 'subvolume', 'delete', newvol], stdout=sp.DEVNULL, stderr=sp.DEVNULL, check=False)
ts = time()
sp.run(['btrfs', 'subvolume', 'snapshot', '-r', self.config.subvolume, newvol], stdout=sp.DEVNULL, check=True)
for s in self.config.retention.filter_expired(filter(
lambda x: not x.startswith('.'),
os.listdir(os.path.join(self.snapvol, 'snapshots'))),
key=self.parse_ts, now=ts):
sp.run(['btrfs', 'subvolume', 'delete', os.path.join(self.snapvol, 'snapshots', s)], check=True)
curvol = os.path.join(self.snapvol, 'snapshots', self.format_ts(ts))
os.rename(newvol, curvol)
borgcount = 0
borgfail = 0
for b in self.config.borg:
try:
self.run_borg(b, ts, curvol)
except:
borgfail += 1
finally:
borgcount += 1
if borgcount > 0 and borgfail >= borgcount:
raise RuntimeError('all backups to borg failed')
def run_borg(self, conf, ts, path):
arcs = list(filter(lambda n: n.startswith(conf.name + '-'),
[a['archive'] for a in json.loads(sp.run(
['borg', 'list', '--json'] + conf.options + [conf.repo],
stdout=sp.PIPE, check=True).stdout)['archives']
]))
newarc = conf.name + '-' + self.format_ts(ts)
if newarc in arcs:
raise RuntimeError(f'archive {newarc} already exist')
arcs.append(newarc)
expired = list(conf.retention.filter_expired(arcs,
key=lambda n: self.parse_ts(n[len(conf.name) + 1:]), now=ts))
if newarc not in expired:
sp.run(['unshare', '-m', '--propagation', 'private', argv[0], '--unshared', self.config.subvolume, path] +
['borg', 'create', '--checkpoint-interval', '600'] + conf.options + conf.create_options +
[conf.repo + '::' + newarc, self.config.subvolume], stdin=sp.DEVNULL, check=True)
for n in expired:
if n == newarc:
continue
sp.run(['borg', 'delete'] + conf.options + [conf.repo + '::' + n], check=True)
self.compact_repo.add(conf.repo)
if __name__ == '__main__':
BtrfsBorg.main()
[Service]
ExecStart=/usr/bin/btrfsborg.py /etc/btrfsborg
Nice=19
CPUSchedulingPolicy=idle
IOSchedulingClass=idle
[Timer]
OnCalendar=hourly
[Install]
WantedBy=timers.target
[Service]
ExecStart=/usr/bin/btrfsborg.py %f
Nice=19
CPUSchedulingPolicy=idle
IOSchedulingClass=idle
[Timer]
OnCalendar=hourly
[Install]
WantedBy=timers.target
[Service]
ExecStart=/usr/bin/duperemove --dedupe-options=same -hqdr %f
Nice=19
CPUSchedulingPolicy=idle
IOSchedulingClass=idle
[Timer]
OnCalendar=weekly
Persistent=true
[Install]
WantedBy=timers.target
pkgname=btrfsmaint
pkgver=0.20231218.0
pkgrel=1
pkgdesc='extra btrfs maintenance and backup tools'
arch=(any)
license=(Unlicense)
depends=(
borg
btrfs-progs
duperemove
python
python-yaml
systemd
)
source=(
btrfsborg.py
btrfsborg.service
btrfsborg.timer
btrfsborg@.service
btrfsborg@.timer
duperemove@.service
duperemove@.timer
)
cksums=(
SKIP
SKIP
SKIP
SKIP
SKIP
SKIP
SKIP
)
package() {
install -Dm755 -t "$pkgdir/usr/bin" "$srcdir/btrfsborg.py"
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/btrfsborg.service"
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/btrfsborg.timer"
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/btrfsborg@.service"
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/btrfsborg@.timer"
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/duperemove@.service"
install -Dm644 -t "$pkgdir/usr/lib/systemd/system" "$srcdir/duperemove@.timer"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment