Skip to content

Instantly share code, notes, and snippets.

@Techcable
Created January 14, 2021 23:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Techcable/718e432281ba2a4e7b1385eb69546e65 to your computer and use it in GitHub Desktop.
Save Techcable/718e432281ba2a4e7b1385eb69546e65 to your computer and use it in GitHub Desktop.
Utilities to print files that would be good to backup (in /etc /var /opt) - Automatically ignores files owned by pacman. Remember to check modification with `pacman -Qii | grep -P '^(?:UN)?MODIFIED'` and `paccheck --sha256sum`
/etc/.pwd.lock
/etc/.updated
/etc/adjtime
/etc/ca-certificates/
/etc/dhcpcd.duid
/etc/group
/etc/group.pacnew
/etc/gshadow-
/etc/ld.so.cache
/etc/locale.gen.pacnew
/etc/localtime
/etc/machine-id
/etc/mkinitcpio.conf.pacnew
/etc/mkinitcpio.d
/etc/os-release
/etc/pacman.d/gnupg
/etc/pacman.d/mirrorlist.bak
/etc/pacman.d/mirrorlist.pacnew
/etc/passwd
/etc/passwd-
/etc/passwd.pacnew
/etc/profile.pacnew
/etc/shadow-
/etc/shadow.pacnew
/etc/shells.pacnew
/etc/ssh/ssh_host*
/etc/ssl
/etc/udev/hwdb.bin
/etc/xdg/lxlauncher
/opt/intellij-idea-ce
/opt/nexus
/usr/bin/lounge
/usr/bin/__pycache__
/usr/lib/gio/modules/giomodule.cache
/usr/lib/locale/locale-archive
/usr/lib/node_modules/
/usr/share/applications
/usr/share/fonts
/usr/share/glib-2.0
/usr/share/mime
/usr/share/nginx/solder
/var/cache/fontconfig
/var/cache/jenkins
/var/cache/ldconfig
/var/cache/man
/var/cache/pacman
/var/cache/private
/var/db/sudo
/var/lib/acme
/var/lib/colord
/var/lib/dbus
/var/lib/dhcpcd
/var/lib/jenkins
/var/lib/pacman
/var/lib/nginx
/var/lib/znc
/var/log
"""Prints files to backup, ignoring from a glob list and ones owned by pacman.
Remember to check backup files for modification with `pacman -Qii | grep -P '^(?:UN)?MODIFIED'`
Also check modified mtrees with `paccheck --sha256sum`
"""
from typing import Optional
import fnmatch
import os
import sys
from pathlib import Path
from dataclasses import dataclass, field
import click
import subprocess
import re
IGNORED = set()
with open('ignore.txt') as f:
for line in f:
line = line.strip()
if not line: continue
if line.startswith('#'): continue
if line.endswith('/'):
line = line[:-1]
IGNORED.add(line)
def should_ignore(path: Path, counts=None) -> bool:
path = path.resolve()
for glob in IGNORED:
if fnmatch.fnmatch(str(path), glob):
if counts is not None:
counts[glob] += 1
return True
return False
NO_OWNER_PATTERN = re.compile("^error: No package owns (.*)$")
PACMAN_OWNER_PATTERN = re.compile("^(.*) is owned by (\S+) (.*)$")
def check_pacman_owners(paths: set[Path]) -> dict[Path, Optional[str]]:
results = {}
proc = subprocess.run(["pacman", "-Qo", *map(str, paths)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8')
if 0 <= proc.returncode <= len(paths):
pass # Its okay if some packages have no owners
else:
proc.check_returncode() # Handle all other errors
text = proc.stdout.strip()
for line in text.splitlines():
match = PACMAN_OWNER_PATTERN.match(line)
assert match is not None, f"Invalid line: {line!r}"
p = Path(match[1])
assert p in paths, f"Unknown path: {match[1]!r} for {paths!r}"
assert p not in results
results[p] = match[2] # Package name
text = proc.stderr.strip()
for line in text.splitlines():
match = NO_OWNER_PATTERN.match(line)
assert match is not None, f"Invalid error line: {line!r}"
p = Path(match[1])
assert p in paths, f"Unknown path: {match[1]!r} for {paths!r}"
assert p not in results
results[p] = None
assert results.keys() == paths
return results
@dataclass
class IgnoreSet:
ignored: set[Path] = field(default_factory=set)
kept: set[Path] = field(default_factory=set)
kept_dirs: set[Path] = field(default_factory=set)
pacman: dict[Path, str] = field(default_factory=dict)
def print(self, level):
if level == "ignored":
for p in sorted(self.ignored):
print(p)
elif level in ("kept", "kept-any"):
data = list(map(str, self.kept))
if level == "kept-any":
data.extend(map(str, self.kept_dirs))
data.sort()
for p in data:
print(p)
elif level == "any":
data = {}
for p in self.ignored:
data[str(p)] = "IGNORED"
for p in self.kept:
data[str(p)] = "FILE"
for p in self.kept_dirs:
data[str(p)] = "DIR"
for p, pkg in self.pacman.items():
data[str(p)] = ("PACMAN", pkg)
data = sorted(data.items(), key=lambda p: p[0])
for name, val in data:
if isinstance(val, str):
val = (val,)
print(name, *val)
else:
raise click.ClickException(f"Invalid level: {level}")
def classify(targets, warn_unused=True) -> IgnoreSet:
ignored_count = {glob: 0 for glob in IGNORED}
res = IgnoreSet()
pacman_queue = []
max_queue_length = 50
def drain_queue():
"""Drain the queue by checking pacman owners
This is batched to ensure reasonable speed"""
# Basic sanity checks
for p in pacman_queue:
assert isinstance(p, Path), repr(p)
assert p not in res.kept
assert p not in res.ignored
assert p not in res.pacman
# Do the deed
owners = check_pacman_owners(set(pacman_queue))
# Make sure to preserve ordering
for p in pacman_queue:
pkg = owners[p]
if pkg is None:
res.kept.add(p)
else:
res.pacman[p] = pkg
# Reset for further use
pacman_queue.clear()
for target in targets:
print(f"Walking {target}", file=sys.stderr)
for dirpath, subdirs, files in os.walk(target):
dirs_to_ignore = set()
for subdir_name in subdirs:
subdir = Path(dirpath, subdir_name)
if should_ignore(subdir, counts=ignored_count):
res.ignored.add(subdir)
dirs_to_ignore.add(subdir_name)
else:
res.kept_dirs.add(subdir)
for ignored in dirs_to_ignore:
subdirs.remove(ignored)
for f in files:
f = Path(dirpath, f)
if should_ignore(f, counts=ignored_count):
res.ignored.add(f)
else:
pacman_queue.append(f)
if len(pacman_queue) >= max_queue_length:
drain_queue()
drain_queue()
assert not pacman_queue
for glob, count in ignored_count.items():
if count >= 1: continue
print(f"WARN: Unused glob: {glob!r}", file=sys.stderr)
return res
@click.command()
@click.option('--level', required=True, help="The level of info to print")
@click.argument('targets', nargs=-1)
def printDesiredFiles(level: str, targets: list[str]):
if not targets:
raise click.ClickException("Must provide some targets to search")
res = classify(targets)
res.print(level)
printDesiredFiles()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment