i am trying to create a prepare-commit-msg
hook that will automatically populate and format my commit messages into an org document that has a level 1 heading for each staged file in the commit. so for example, this is one i did by hand, but basically like this: https://github.com/samdmarshall/dotfiles/commit/6016f773b6e596defe810bded704372672fdfb28 where each staged file has it’s own heading and is tagged according to the status of that staged file. so when i’m finished in my commit editor, the contents of my .git/COMMIT_EDITMSG
looks like this:
#+title: git@github.com/somebody/somerepo.git @ develop
#+author: <git.config["user.name"]>
#+email: <git.config["user.email"]>
#+date: <timestamp of running =git commit=>
cutting a new release
#
# Description
#
* changelog :modified:
- bumping version number and cutting latest release
* src/version.c :modified:
- change in =PATCH_VERSION= definition
from here, i’m going to use another hook to make a copy of this file for my own uses, but the issue centers on interfacing with libgit2 via the pygit2 python package via python3 and not being able to get this list of staged files and their current status (the output of git diff --name-status --cached
). i’d even be content with just the list of currently staged files (the output of git diff --name-only --cached
), as i know how to look up the status of any file by it’s path from the index/workdir. however after spending yesterday pouring over the code in diff.c
and diff_stat.c
in the libgit2 source code, i have no idea how to invoke either of those two commands via the functionality provided by this wrapper. it may be that the python wrapper doesn’t provide the functionality i’m looking for, but it’d surprise me since it seems to be the officially sanctioned version of the libgit2 interface for python.
This is the code i’ve written so far for doing this. admittedly i could probably do all of this in a bash or another shell script with less overhead complexity (no calling into libgit2 directly and such) but would require a lot more ambiguous string parsing and is more apt to break in unexpected ways than interfacing with the apis properly so i chose python as a best of both worlds.
#!/usr/bin/env python3
# file: .git/hooks/prepare-commit-msg
# =======
# Imports
# =======
import os
import sys
from datetime import date
from pathlib import Path
from subprocess import check_output
import pygit2
# =========
# Functions
# =========
def find_root_git_repo(file: Path) -> Path:
result = file.resolve()
parents = [p for p in result.parents]
for parent in reversed(parents):
git_repo = parent.joinpath(".git/")
if git_repo.exists() and git_repo.is_dir():
result = parent
break
return result
def resolve_tag(tag_identifier: str) -> str:
lookup = {
'A': ":added:",
'M': ":modified:",
'D': ":deleted:",
'R': ":renamed:",
'C': ":copied:",
'I': ":ignored:",
'?': ":untracked:",
'T': ":typechange:",
'X': ":unreadable:",
' ': "",
}
return lookup[tag_identifier]
# ===========
# Entry Point
# ===========
exit_code = 0
arguments_count = len(sys.argv)
# Hook Arguments
commit_msg_file = Path(".git/COMMIT_EDITMSG")
commit_source = ""
commit_sha = ""
if arguments_count == 1:
exit_code = 1
else:
if arguments_count >= 3:
commit_msg_file = Path(sys.argv[1])
commit_source = str(sys.argv[2])
if arguments_count == 4:
commit_sha = str(sys.argv[3])
else:
exit_code = 1
# this is the path we are in
repo_path = find_root_git_repo(Path.cwd())
# ensure
if exit_code == 0:
repo = pygit2.Repository(repo_path) # GIT_BRANCH_LOCAL
commit = None
head = None # repo.revparse_single("HEAD")
if commit_source == "template":
# Using `-t|--template` or the config option `commit.template` exists
print("Filling file from template...")
if commit_source == "message":
# Using `-m|--message` or `-F|--file`
print("Using specified message or file...")
if commit_source == "merge":
# If generating merge commit, or `.git/MERGE_MSG` exists
print("Generating merge commit...")
if commit_source == "squash":
# If generating squash commit, or `.git/SQUASH_MSG` exists
print("Generating squash commit...")
if commit_source == "commit":
# Using `-c|--reedit-message`, `-C|--reuse-message`, or `--amend`
print("Modifying existing commit message...")
commit = repo.revparse_single(commit_sha)
if commit_source == "":
# No message or template specified, this is a regular commit
print("Generating commit message...")
# this is the problem i'm having, the results returned by this diff aren't what i expect
diff = repo.diff(cached=True)
print(diff.patch) # <- this returns "None" instead of the expected patch for an empty file
# i could use this method to iterate over the entire repo contents and add what is staged,
# but that seems wrong and very costly for a hook to do, i'm not opposed to using this to
# look up status of a subset of files, but i need to know which files those are first.
status = repo.status()
# this code is used right now, to work around the problem
diff_output = check_output(["git", "--no-pager", "diff", "--cached", "--name-status"]).decode("utf-8").strip()
commit_diff = dict([diff_line.split('\t')[::-1] for diff_line in diff_output.splitlines()])
# open the existing commit message file to add my formatting
fd = open(commit_msg_file, 'a')
with fd:
active_branch = [branch for branch in list(repo.branches) if repo.branches[branch].is_head()]
#commit_author_date = os.environ.get("GIT_AUTHOR_DATE", "")
#commit_creator_date = os.environ.get("GIT_COMMITTER_DATE", "")
today = date.today()
timestamp = today.format("<%Y-%m-%d %a %H:%M>")
fd.write("#+title: {} @ {}\n".format( repo.workdir, any(active_branch) ))
fd.write("#+author: {}\n".format( repo.config["user.name"] ))
fd.write("#+email: {}\n".format( repo.config["user.email"] ))
fd.write("#+date: {}\n".format( timestamp ))
fd.write("#+startup: indent showeverything\n")
fd.write("\n")
for file in commit_diff.keys():
fd.write("* {} {} \n".format(file, resolve_tag(commit_diff[file])))
fd.write(" - \n")
fd.close()
sys.exit(exit_code)