Created
January 14, 2021 23:15
-
-
Save Techcable/718e432281ba2a4e7b1385eb69546e65 to your computer and use it in GitHub Desktop.
Utilities to print files that would be good to backup (in /etc /var /opt) - Automatically ignores files owned by pacman. Remember to check modification with `pacman -Qii | grep -P '^(?:UN)?MODIFIED'` and `paccheck --sha256sum`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/etc/.pwd.lock | |
/etc/.updated | |
/etc/adjtime | |
/etc/ca-certificates/ | |
/etc/dhcpcd.duid | |
/etc/group | |
/etc/group.pacnew | |
/etc/gshadow- | |
/etc/ld.so.cache | |
/etc/locale.gen.pacnew | |
/etc/localtime | |
/etc/machine-id | |
/etc/mkinitcpio.conf.pacnew | |
/etc/mkinitcpio.d | |
/etc/os-release | |
/etc/pacman.d/gnupg | |
/etc/pacman.d/mirrorlist.bak | |
/etc/pacman.d/mirrorlist.pacnew | |
/etc/passwd | |
/etc/passwd- | |
/etc/passwd.pacnew | |
/etc/profile.pacnew | |
/etc/shadow- | |
/etc/shadow.pacnew | |
/etc/shells.pacnew | |
/etc/ssh/ssh_host* | |
/etc/ssl | |
/etc/udev/hwdb.bin | |
/etc/xdg/lxlauncher | |
/opt/intellij-idea-ce | |
/opt/nexus | |
/usr/bin/lounge | |
/usr/bin/__pycache__ | |
/usr/lib/gio/modules/giomodule.cache | |
/usr/lib/locale/locale-archive | |
/usr/lib/node_modules/ | |
/usr/share/applications | |
/usr/share/fonts | |
/usr/share/glib-2.0 | |
/usr/share/mime | |
/usr/share/nginx/solder | |
/var/cache/fontconfig | |
/var/cache/jenkins | |
/var/cache/ldconfig | |
/var/cache/man | |
/var/cache/pacman | |
/var/cache/private | |
/var/db/sudo | |
/var/lib/acme | |
/var/lib/colord | |
/var/lib/dbus | |
/var/lib/dhcpcd | |
/var/lib/jenkins | |
/var/lib/pacman | |
/var/lib/nginx | |
/var/lib/znc | |
/var/log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Prints files to backup, ignoring from a glob list and ones owned by pacman. | |
Remember to check backup files for modification with `pacman -Qii | grep -P '^(?:UN)?MODIFIED'` | |
Also check modified mtrees with `paccheck --sha256sum` | |
""" | |
from typing import Optional | |
import fnmatch | |
import os | |
import sys | |
from pathlib import Path | |
from dataclasses import dataclass, field | |
import click | |
import subprocess | |
import re | |
IGNORED = set() | |
with open('ignore.txt') as f: | |
for line in f: | |
line = line.strip() | |
if not line: continue | |
if line.startswith('#'): continue | |
if line.endswith('/'): | |
line = line[:-1] | |
IGNORED.add(line) | |
def should_ignore(path: Path, counts=None) -> bool: | |
path = path.resolve() | |
for glob in IGNORED: | |
if fnmatch.fnmatch(str(path), glob): | |
if counts is not None: | |
counts[glob] += 1 | |
return True | |
return False | |
NO_OWNER_PATTERN = re.compile("^error: No package owns (.*)$") | |
PACMAN_OWNER_PATTERN = re.compile("^(.*) is owned by (\S+) (.*)$") | |
def check_pacman_owners(paths: set[Path]) -> dict[Path, Optional[str]]: | |
results = {} | |
proc = subprocess.run(["pacman", "-Qo", *map(str, paths)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8') | |
if 0 <= proc.returncode <= len(paths): | |
pass # Its okay if some packages have no owners | |
else: | |
proc.check_returncode() # Handle all other errors | |
text = proc.stdout.strip() | |
for line in text.splitlines(): | |
match = PACMAN_OWNER_PATTERN.match(line) | |
assert match is not None, f"Invalid line: {line!r}" | |
p = Path(match[1]) | |
assert p in paths, f"Unknown path: {match[1]!r} for {paths!r}" | |
assert p not in results | |
results[p] = match[2] # Package name | |
text = proc.stderr.strip() | |
for line in text.splitlines(): | |
match = NO_OWNER_PATTERN.match(line) | |
assert match is not None, f"Invalid error line: {line!r}" | |
p = Path(match[1]) | |
assert p in paths, f"Unknown path: {match[1]!r} for {paths!r}" | |
assert p not in results | |
results[p] = None | |
assert results.keys() == paths | |
return results | |
@dataclass | |
class IgnoreSet: | |
ignored: set[Path] = field(default_factory=set) | |
kept: set[Path] = field(default_factory=set) | |
kept_dirs: set[Path] = field(default_factory=set) | |
pacman: dict[Path, str] = field(default_factory=dict) | |
def print(self, level): | |
if level == "ignored": | |
for p in sorted(self.ignored): | |
print(p) | |
elif level in ("kept", "kept-any"): | |
data = list(map(str, self.kept)) | |
if level == "kept-any": | |
data.extend(map(str, self.kept_dirs)) | |
data.sort() | |
for p in data: | |
print(p) | |
elif level == "any": | |
data = {} | |
for p in self.ignored: | |
data[str(p)] = "IGNORED" | |
for p in self.kept: | |
data[str(p)] = "FILE" | |
for p in self.kept_dirs: | |
data[str(p)] = "DIR" | |
for p, pkg in self.pacman.items(): | |
data[str(p)] = ("PACMAN", pkg) | |
data = sorted(data.items(), key=lambda p: p[0]) | |
for name, val in data: | |
if isinstance(val, str): | |
val = (val,) | |
print(name, *val) | |
else: | |
raise click.ClickException(f"Invalid level: {level}") | |
def classify(targets, warn_unused=True) -> IgnoreSet: | |
ignored_count = {glob: 0 for glob in IGNORED} | |
res = IgnoreSet() | |
pacman_queue = [] | |
max_queue_length = 50 | |
def drain_queue(): | |
"""Drain the queue by checking pacman owners | |
This is batched to ensure reasonable speed""" | |
# Basic sanity checks | |
for p in pacman_queue: | |
assert isinstance(p, Path), repr(p) | |
assert p not in res.kept | |
assert p not in res.ignored | |
assert p not in res.pacman | |
# Do the deed | |
owners = check_pacman_owners(set(pacman_queue)) | |
# Make sure to preserve ordering | |
for p in pacman_queue: | |
pkg = owners[p] | |
if pkg is None: | |
res.kept.add(p) | |
else: | |
res.pacman[p] = pkg | |
# Reset for further use | |
pacman_queue.clear() | |
for target in targets: | |
print(f"Walking {target}", file=sys.stderr) | |
for dirpath, subdirs, files in os.walk(target): | |
dirs_to_ignore = set() | |
for subdir_name in subdirs: | |
subdir = Path(dirpath, subdir_name) | |
if should_ignore(subdir, counts=ignored_count): | |
res.ignored.add(subdir) | |
dirs_to_ignore.add(subdir_name) | |
else: | |
res.kept_dirs.add(subdir) | |
for ignored in dirs_to_ignore: | |
subdirs.remove(ignored) | |
for f in files: | |
f = Path(dirpath, f) | |
if should_ignore(f, counts=ignored_count): | |
res.ignored.add(f) | |
else: | |
pacman_queue.append(f) | |
if len(pacman_queue) >= max_queue_length: | |
drain_queue() | |
drain_queue() | |
assert not pacman_queue | |
for glob, count in ignored_count.items(): | |
if count >= 1: continue | |
print(f"WARN: Unused glob: {glob!r}", file=sys.stderr) | |
return res | |
@click.command() | |
@click.option('--level', required=True, help="The level of info to print") | |
@click.argument('targets', nargs=-1) | |
def printDesiredFiles(level: str, targets: list[str]): | |
if not targets: | |
raise click.ClickException("Must provide some targets to search") | |
res = classify(targets) | |
res.print(level) | |
printDesiredFiles() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment