Skip to content

Instantly share code, notes, and snippets.

@AlexanderVR
Last active December 18, 2021 23:47
Show Gist options
  • Save AlexanderVR/84c89ecdb9d845600af96b6831d5c504 to your computer and use it in GitHub Desktop.
Save AlexanderVR/84c89ecdb9d845600af96b6831d5c504 to your computer and use it in GitHub Desktop.
compute diffs of pants targets with earlier revision
#!/usr/bin/env python
"""
Usage: `./diff_targets <git ref>`
Generates a list of targets that have changed since the provided git ref.
Currently, `./pants list --changed-since=...` provides a significant overestimate of this.
For example, adding a comment to a BUILD file marks all targets defined there as being changed.
Targets can change in the following ways:
- A target references a file that has changed
- A build file has changed, where some of its target definitions have been modified
- ...any other ways?
Assumption:
- If a BUILD file changes, we should see all changes to targets reflected
in the metadata provided by `./pants peek`
Approach:
1) get the overestimate of changes to targets based on `--changed-since`.
2) use `git diff --name-only` to compute the set of changed files.
3) match these changed files with targets from `--changed-since` (this will ignore BUILD file changes)
4) use `git clone --shared` to quickly set up a temporary copy at the earlier ref.
5) diff the results of `./pants peek <(1)>` with the same command applied to the earlier git ref.
6) return the union of (3) and (5)
"""
import subprocess
import json
import typing as tp
import tempfile
def changed_targets_from_files(git_ref: str) -> tp.Set[str]:
changed_since = (
subprocess.run(
[
"./pants",
"filter",
"--filter-target-type=-_python_requirements_file,python_requirement",
f"--changed-since={git_ref}",
],
encoding="utf-8",
capture_output=True,
)
.stdout.strip()
.split("\n")
)
changed_files = (
subprocess.run(
["git", "diff", "--name-only", git_ref],
encoding="utf-8",
capture_output=True,
)
.stdout.strip()
.split("\n")
)
# assume target address is always `file` or `file:path_to_BUILD`
address2file = {t: t.split(":", 1)[0] for t in changed_since}
return {t for t in changed_since if address2file[t] in set(changed_files)}
def changed_targets_from_builds(git_ref: str) -> tp.Set[str]:
changed_details_json = subprocess.run(
["./pants", "peek", f"--changed-since={git_ref}"],
encoding="utf-8",
capture_output=True,
).stdout.strip()
changed_details = json.loads(changed_details_json or "[]")
target_hashes = {t["address"]: hash(json.dumps(t)) for t in changed_details}
with tempfile.TemporaryDirectory() as tmpdir:
subprocess.run(["git", "clone", "-q", "--shared", ".", tmpdir])
subprocess.run(["git", "checkout", "-q", git_ref], cwd=tmpdir)
all_old_targets = (
subprocess.run(
["./pants", "list", "::"],
cwd=tmpdir,
encoding="utf-8",
capture_output=True,
)
.stdout.strip()
.split("\n")
)
matching_targets = set(target_hashes.keys()) & set(all_old_targets)
r = subprocess.run(
["./pants", "peek", *matching_targets],
cwd=tmpdir,
encoding="utf-8",
capture_output=True,
)
old_changed_details_json = r.stdout.strip()
old_changed_details = json.loads(old_changed_details_json or "[]")
old_target_hashes = {t["address"]: hash(json.dumps(t)) for t in old_changed_details}
return {t for t, hsh in target_hashes.items() if old_target_hashes.get(t) != hsh}
def diff_targets(git_ref: str):
return sorted(
changed_targets_from_files(git_ref) | changed_targets_from_builds(git_ref)
)
if __name__ == "__main__":
import sys
git_ref = sys.argv[1] if len(sys.argv) > 1 else "HEAD"
for t in diff_targets(git_ref):
print(t)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment