Skip to content

Instantly share code, notes, and snippets.

@ruxkor
Created February 1, 2012 01:06
Show Gist options
  • Save ruxkor/1714314 to your computer and use it in GitHub Desktop.
Save ruxkor/1714314 to your computer and use it in GitHub Desktop.
get the most changed files of a git repository by analyzing its log
#!/usr/bin/env python
# Usage:
# pipe, or pass as a file something identical to the
# following command:
#
# git log --pretty="format:COMMIT %h %at %f" --numstat
#
#
#
# Copyright 2012 Andor Goetzendorff
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
# of the Software, and to permit persons to whom the Software is furnished to do
# so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
smooth_factor = 1.005
rounding_factor = 5
pct_threshold = 0.05
commit_threshold = 800
file_limit = 50
is_relevant_file = lambda filename: '.py' in filename and '/tests/' not in filename
# do not modify from here
cur_pct = smooth_factor
cur_comm = None
comm_counter = 0
stats_files = {}
import fileinput
for l in fileinput.input():
l = l.strip()
if l[0:6] == 'COMMIT':
comm_counter += 1
cur_comm = l.split(' ',3)
(comm_hash,comm_ts) = cur_comm[1:3]
cur_pct = round(cur_pct / smooth_factor, rounding_factor)
if cur_pct < pct_threshold or comm_counter >= commit_threshold:
break
elif len(l) > 0 :
(added, removed, chg_file) = l.split('\t')
if added == '-': continue
added = int(added)
removed = int(removed)
if chg_file not in stats_files:
stats_files[chg_file] = {'added':0,'removed':0,'changed':0,'times':0}
stats_files[chg_file]['added'] += round(added / cur_pct,4)
stats_files[chg_file]['removed'] += round(removed / cur_pct,4)
stats_files[chg_file]['changed'] += round( (added / cur_pct) + (removed / cur_pct),4)
stats_files[chg_file]['times'] += round( 1 / cur_pct,4)
chg_files = sorted(
((fname,int(round(vals['times'])),int(round(vals['changed']))) for (fname,vals) in stats_files.iteritems() if is_relevant_file(fname)),
key = lambda x: x[1:],
reverse=True
)
for chg_file in chg_files[:file_limit+1]:
print '%s\t%d\t%d' % chg_file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment