Skip to content

Instantly share code, notes, and snippets.

@cspickert
Last active December 16, 2015 02:09
Show Gist options
  • Save cspickert/5360838 to your computer and use it in GitHub Desktop.
Save cspickert/5360838 to your computer and use it in GitHub Desktop.
Plot word frequency over time in a git repo's commit messages using matplotlib.
#!/usr/bin/python
"""
Plot the frequency of the given words in the commit messages of a git repository.
Usage: python word_frequency.py git_dir word1 [word2...]
Example: python word_frequency.py /path/to/repo/.git fix add remove
"""
import sys
import datetime
import pygit2
import matplotlib.pyplot as plot
def month_for_date(date):
return datetime.datetime.strptime(date.strftime("%Y %m"), "%Y %m")
if len(sys.argv) < 3:
print __doc__
sys.exit(1)
repo = pygit2.Repository(sys.argv[1])
keywords = set(sys.argv[2:])
dates_freqs = []
for commit in repo.walk(repo.head.oid, pygit2.GIT_SORT_TIME):
date = datetime.date.fromtimestamp(commit.commit_time)
freq = dict(zip(keywords, map(commit.message.lower().count, keywords)))
dates_freqs.append((date, freq))
for keyword in keywords:
data = {}
for date, freq in dates_freqs:
month = month_for_date(date)
if not data.get(month):
data[month] = 0
data[month] += freq[keyword]
keys, vals = [], []
for key in sorted(data.keys()):
keys.append(key)
vals.append(data[key])
plot.plot(keys, vals, label=keyword)
plot.legend()
plot.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment