Skip to content

Instantly share code, notes, and snippets.

@simonw
Created August 3, 2017 23:43
Show Gist options
  • Star 20 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save simonw/091b765a071d1558464371042db3b959 to your computer and use it in GitHub Desktop.
Save simonw/091b765a071d1558464371042db3b959 to your computer and use it in GitHub Desktop.
Python function for retrieving all git commits (from "git log" in the current directory) as a useful data structure
[
{
"date": "Mon Jan 23 13:03:00 2017 -0800",
"message": "Recommended by Django https://docs.djangoproject.com/en/1.10/ref/databases/#setting-sql-mode",
"title": "SET sql_mode='STRICT_TRANS_TABLES';",
"hash": "a395f401c9a6d3e8602b80ecc729d736da871f46",
"author": "Simon Willison <simonw@example.com>"
},
{
"date": "Mon Jan 23 11:37:38 2017 -0800",
"message": "Turns out django.utils.log.NullHandler was removed in Django 1.9\n\nhttp://stackoverflow.com/a/34349623",
"title": "Fixed logging NullHandler",
"hash": "0a1da86196cb72ead43cd8ac2d256a79d4f3bd4a",
"author": "Simon Willison <simonw@example.com>"
}
]
import subprocess
import re
leading_4_spaces = re.compile('^ ')
def get_commits():
lines = subprocess.check_output(
['git', 'log'], stderr=subprocess.STDOUT
).split('\n')
commits = []
current_commit = {}
def save_current_commit():
title = current_commit['message'][0]
message = current_commit['message'][1:]
if message and message[0] == '':
del message[0]
current_commit['title'] = title
current_commit['message'] = '\n'.join(message)
commits.append(current_commit)
for line in lines:
if not line.startswith(' '):
if line.startswith('commit '):
if current_commit:
save_current_commit()
current_commit = {}
current_commit['hash'] = line.split('commit ')[1]
else:
try:
key, value = line.split(':', 1)
current_commit[key.lower()] = value.strip()
except ValueError:
pass
else:
current_commit.setdefault(
'message', []
).append(leading_4_spaces.sub('', line))
if current_commit:
save_current_commit()
return commits
@simonw
Copy link
Author

simonw commented Aug 7, 2017

# Here's how I used this to find the longest commits in a repo
commits = get_commits()
commits.sort(key = lambda c: len(c['message']), reverse=True)
for commit in commits[:30]:
    print commit['title']
    print commit['author']
    print commit['hash']
    print 'https://github.com/simonw/repo_name/commit/' + commit['hash']
    print

@sanidhya-singh
Copy link

sanidhya-singh commented Jan 9, 2020

# Added decode to get this to work for me 

lines = subprocess.check_output(
       ['git', 'log'], stderr=subprocess.STDOUT
       ).decode("utf-8").split("\n")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment