Created
June 13, 2021 16:24
-
-
Save asford/4966a816d40d5e0579edbf4e0b9e410f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import io\n", | |
"import logging\n", | |
"import subprocess\n", | |
"from contextlib import suppress\n", | |
"from typing import Iterable, Tuple" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def _get_revision_tree_ids(commit_shas: Iterable[str]) -> Iterable[Tuple[str, str]]:\n", | |
" \"\"\"Given a list of commit shas, read tree ids via cat-file.\n", | |
"\n", | |
" Yields (commit_sha, tree_sha) tuples for all commits successfully resolved to a tree object.\n", | |
" \"\"\"\n", | |
"\n", | |
" # cat-file --batch print object blobs in a record-oriented streaming format\n", | |
" # See:\n", | |
" # https://git-scm.com/docs/git-cat-file#_batch_output\n", | |
" cat_result = subprocess.check_output(\n", | |
" [\"git\", \"cat-file\", \"--batch\"], input=\"\\n\".join(commit_shas).encode()\n", | |
" )\n", | |
"\n", | |
" reader = io.BytesIO(cat_result)\n", | |
" \n", | |
" # Stream through the cat-file results, yielding (commit_sha, tree_sha)\n", | |
" # when a valid commit object in found.\n", | |
" #\n", | |
" # In the happy path cat-file should emit for every sha, \n", | |
" # however there may be missing objects/symlinks/etc is more esoteric\n", | |
" # repository states.\n", | |
" # Just ignore these records.\n", | |
" \n", | |
" object_line = reader.readline().strip()\n", | |
" while object_line:\n", | |
" # We want objects of the form\n", | |
" # '<hash> commit <content_size>\\n<content>\\n'\n", | |
" object_info = object_line.split()\n", | |
"\n", | |
" # But there are alternative forms if the commit is missing, lives in a symlink, etc.\n", | |
" #\n", | |
" # However, if the read has any content the last entry will be an integer content size.\n", | |
" # First read any content that's present off the input stream.\n", | |
" #\n", | |
" # object_info may be a three tuple or two tuple. Eg:\n", | |
" # <object> <type> <size>\\n<content>\\m\n", | |
" # symlink <size>\\n<content>\\n\n", | |
" #\n", | |
" # If the read has no content it will be a two-tuple. Eg:\n", | |
" # <object> missing\\n\n", | |
" # <object> ambiguous\\n\n", | |
" # \n", | |
" # Just try to parse for the size...\n", | |
" size = None\n", | |
" content = None\n", | |
" with suppress(ValueError):\n", | |
" size = int(object_info[-1])\n", | |
"\n", | |
" if size is not None:\n", | |
" # Trailing newline on content\n", | |
" content = reader.read(size + 1)\n", | |
" assert content[-1:] == b\"\\n\"\n", | |
" content = content[:-1]\n", | |
"\n", | |
" # Now we've read through the object in the content stream and\n", | |
" # can safely continue if we don't find an object of interest\n", | |
" if object_info[1] != b\"commit\":\n", | |
" logging.warning(\"non-commit object reading tree hashes: %s\", object_info)\n", | |
" continue\n", | |
"\n", | |
" # commit objects are line-oriented records themselves,\n", | |
" # but always start with the tree reference of the form:\n", | |
" #\n", | |
" # tree <tree_sha>\\n\n", | |
" #\n", | |
" # https://git-scm.com/book/en/v2/Git-Internals-Git-Objects\n", | |
" if not content.startswith(b\"tree\"):\n", | |
" logging.warning(\"commit object didn't start with tree entry: %s\", object_info)\n", | |
"\n", | |
" tree, tree_sha, rest = content.split(maxsplit=2)\n", | |
"\n", | |
" yield (object_info[0].decode(), tree_sha.decode())\n", | |
"\n", | |
" # Read the next object record\n", | |
" object_line = reader.readline().strip()\n", | |
"\n", | |
"\n", | |
"def _get_matching_tree_commits_via_catfile(commit_sha: str, candidate_shas: str):\n", | |
" \"\"\"Identify all commits with tree contents matching commit_rev\"\"\"\n", | |
" ((_commit_rev, commit_tree),) = revision_tree_ids([commit_sha])\n", | |
"\n", | |
" matches = []\n", | |
"\n", | |
" for other_rev, other_tree in revision_tree_ids(candidate_shas):\n", | |
" if other_tree == commit_tree:\n", | |
" matches.append(other_rev)\n", | |
"\n", | |
" return matches\n", | |
"\n", | |
"\n", | |
"def matching_tree_commits_via_difftree(commit_sha: str, candidate_shas: str):\n", | |
" matches = []\n", | |
" for candidate_sha in candidate_shas:\n", | |
" check = subprocess.run(\n", | |
" [\"git\", \"diff-tree\", \"--quiet\", commit_sha, candidate_sha]\n", | |
" )\n", | |
"\n", | |
" if check.returncode == 0:\n", | |
" matches.append(candidate_sha)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"100" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"rev_list= !git rev-list HEAD ^HEAD~100\n", | |
"len(rev_list)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"16.2 ms ± 530 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"matching_tree_commits_via_catfile(rev_list[0], rev_list)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"858 ms ± 38.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"matching_tree_commits_via_difftree(rev_list[0], rev_list)" | |
] | |
} | |
], | |
"metadata": { | |
"jupytext": { | |
"text_representation": { | |
"extension": ".py", | |
"format_name": "percent", | |
"format_version": "1.3", | |
"jupytext_version": "1.11.3" | |
} | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment