Skip to content

Instantly share code, notes, and snippets.

@Neo-X
Forked from vitchyr/git_info.py
Created May 14, 2021 23:39
Show Gist options
  • Save Neo-X/5c2d2ceefdab3b2ac7c4246d13c6b6ba to your computer and use it in GitHub Desktop.
Save Neo-X/5c2d2ceefdab3b2ac7c4246d13c6b6ba to your computer and use it in GitHub Desktop.
Save Git Info in Python for Reproducible Experiments
"""
Helper functions to save git information every time you
Requirements:
- GitPython==2.1.12
(Probably works on other GitPython versions, but this is the version I've tested.)
Usage:
```
from git_info import save_git_infos
def experiment(*args, log_dir, **kwargs):
git_infos = save_git_infos(
[
'/home/user/code_dir1',
'/home/user/code_dir2',
],
log_dir,
)
# rest of experiment code
```
This will save a bunch of git repository information to `log_dir` so that your
experiment is reproducible. In particular, for each code directory listed, it
will save:
- the path to the code directory
- the git branch and hash
- any unstaged and staged diff in the repo
Ideally this information will be enough to reproduce any results.
"""
import json
import os
import os.path as osp
from typing import NamedTuple, List, Union
GitInfo = NamedTuple(
'GitInfo',
[
('directory', str),
('code_diff', str),
('code_diff_staged', str),
('commit_hash', str),
('branch_name', str),
],
)
def _generate_git_infos(code_dirs):
try:
import git
git_infos = []
for directory in code_dirs:
# Idk how to query these things, so I'm just doing try-catch
try:
repo = git.Repo(directory)
try:
branch_name = repo.active_branch.name
except TypeError:
branch_name = '[DETACHED]'
git_infos.append(GitInfo(
directory=directory,
code_diff=repo.git.diff(None),
code_diff_staged=repo.git.diff('--staged'),
commit_hash=repo.head.commit.hexsha,
branch_name=branch_name,
))
except git.exc.InvalidGitRepositoryError:
git_infos.append(GitInfo(
directory=directory,
code_diff='',
code_diff_staged='',
commit_hash='',
branch_name='(not a git repo)',
))
pass
except (ImportError, UnboundLocalError, NameError) as e:
print("Install GitPython to automatically save git information.")
git_infos = []
return git_infos
def save_git_info(code_dirs, log_dir: str):
git_infos = _generate_git_infos(code_dirs)
for (
directory, code_diff, code_diff_staged, commit_hash, branch_name
) in git_infos:
if directory[-1] == '/':
diff_file_name = directory[1:-1].replace("/", "-") + ".patch"
diff_staged_file_name = (
directory[1:-1].replace("/", "-") + "_staged.patch"
)
else:
diff_file_name = directory[1:].replace("/", "-") + ".patch"
diff_staged_file_name = (
directory[1:].replace("/", "-") + "_staged.patch"
)
if code_diff is not None and len(code_diff) > 0:
with open(osp.join(log_dir, diff_file_name), "w") as f:
f.write(code_diff + '\n')
if code_diff_staged is not None and len(code_diff_staged) > 0:
with open(osp.join(log_dir, diff_staged_file_name), "w") as f:
f.write(code_diff_staged + '\n')
with open(osp.join(log_dir, "git_infos.txt"), "a") as f:
f.write("directory: {}".format(directory))
f.write('\n')
f.write("git hash: {}".format(commit_hash))
f.write('\n')
f.write("git branch name: {}".format(branch_name))
f.write('\n\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment