Skip to content

Instantly share code, notes, and snippets.

@ssimono
Last active December 4, 2022 22:26
Embed
What would you like to do?
Partial file tracking script
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from json import load, dump
from re import search
from subprocess import run, PIPE
from sys import exit
from textwrap import dedent
import hashlib
def _git_update_index(skip_worktree, file_path):
prefix = "" if skip_worktree else "no-"
run(["git", "update-index", f"--{prefix}skip-worktree", file_path], stderr=PIPE)
def _compute_hash(file_path, patterns):
try:
with open(file_path, "r") as f:
sha1 = hashlib.new("sha1")
for line in f:
ignored = False
for p in patterns:
if search(p, line):
ignored = True
break
if not ignored:
sha1.update(line.encode())
return sha1.hexdigest()
except FileNotFoundError as e:
return ""
def process_tracked_files(cli, tracked_files):
hash_updates = {}
for tracked in tracked_files:
file_path = tracked["path"]
fingerprint = _compute_hash(file_path, tracked["ignoredPatterns"])
if fingerprint != tracked.get("fingerprint", ""):
if cli.verbose:
print(
f"{file_path}: Detected change outside of ignored patterns. New fingerprint: {fingerprint}"
)
hash_updates[file_path] = fingerprint
_git_update_index(False, file_path)
else:
if cli.verbose:
print(
f"{file_path}: No interesting change detected. Setting skip-worktree flag"
)
# Only do it for existing files
if fingerprint != "":
_git_update_index(True, file_path)
return hash_updates
def main(cli):
with open(cli.index_file, "r") as idx:
index_content = load(idx)
hash_updates = process_tracked_files(cli, index_content["files"])
if len(hash_updates) and cli.write:
if cli.verbose:
print(f"Updating the fingerprints in {cli.index_file}")
new_content = dict(**index_content)
new_content.update(
files=[
dict(
f,
fingerprint=hash_updates.get(f["path"], f.get("fingerprint", "")),
)
for f in index_content["files"]
]
)
with open(cli.index_file, "w") as idx_w:
dump(new_content, idx_w, indent=2)
exit(0)
if len(hash_updates):
exit(1)
if __name__ == "__main__":
parser = ArgumentParser(
prog="partial-track",
description=dedent(
"""
Reads a json index file containing a list of git-versioned files and ignored patterns.
For each of the tracked files, the script will:
1. compute the sha1 fingerprint of the tracked file content stripped out of any line matching one ignored pattern
2. if --write is passed, store the resulting fingerprint on the index file
3. if the fingerprint is identical as the value from the index file, set the --skip-worktree flag on the git index for the file.
4. if the fingerprint is different, unset the --skip-worktree, so the file appears as modified on the worktree.
Will exit with a non-zero code if any tracked file went through case 4.
"""
),
epilog=dedent(
"""
The format of the index file should be as follows:
{
"files": [
{
"path": "the-file-i-only-partially-care-about.gen.php",
"ignoredPatterns": [
"^Automatically generated on \d+",
"^bitcoin value at time of generation: \d+",
"CPU used for generation"
]
}
]
}
"""
),
formatter_class=RawDescriptionHelpFormatter,
)
parser.add_argument("index_file", help="index file")
parser.add_argument(
"-w",
"--write",
help="Save the new computed fingerprints into the index file.",
action="store_true",
)
parser.add_argument(
"-v",
"--verbose",
help="Output status for each tracked file",
action="store_true",
)
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment