Skip to content

Instantly share code, notes, and snippets.

@martinmoene
Last active October 10, 2018 13:20
Show Gist options
  • Save martinmoene/5832fdbef4a0c6951c03ecec3765b58e to your computer and use it in GitHub Desktop.
Save martinmoene/5832fdbef4a0c6951c03ecec3765b58e to your computer and use it in GitHub Desktop.
Compare number of lines in different Subversion revisions of files
# file1.cpp has code removed unintentionally:
> svn-diff-size.py --threshold -60 --histogram --revision 4710:4839 Folder/*.cpp
Folder/file1.cpp: (4317 -> 4136): -181
Folder/file2.cpp: (2323 -> 2145): -178
Folder/file2.cpp: (1031 -> 886): -145
Histogram delta line-count:
-200: 0
-180: 2 **
-160: 0
-140: 1 *
-120: 0
-100: 0
-80: 0
-60: 0
-40: 3 ***
-20: 10 **********
+0: 32 ********************************
+20: 2 **
+40: 0
+60: 1 *
+80: 0
+100: 0
+120: 0
+140: 0
+160: 0
+180: 0
+200: 0
# file1.cpp after revert of unintentionally removed code:
> svn-diff-size.py --threshold -60 --histogram --revision 4710:HEAD Folder/*.cpp
Folder/file1.cpp: (4317 -> 4152): -165
Folder/file2.cpp: (2323 -> 2262): -61
Folder/file3.cpp: (1031 -> 886): -145
Histogram delta line-count:
-300: 0
-270: 0
-240: 0
-210: 0
-180: 1 *
-150: 1 *
-120: 0
-90: 0
-60: 2 **
-30: 4 ****
+0: 36 ************************************
+30: 3 ***
+60: 2 **
+90: 0
+120: 0
+150: 0
+180: 0
+210: 1 *
+240: 0
+270: 1 *
+300: 0
#!/usr/bin/env python
from __future__ import print_function
import argparse
import glob
import math
import os
import re
import subprocess
import sys
# Configuration:
# Subversion command:
cfg_svn = 'svn'
cfg_wc = 'wc'
# End configuration.
def x_label( scale, n ):
return int(round(n / scale))
def x( bin_count, scale, n ):
return bin_count / 2 + int(round(scale * n))
def y( scale, n ):
return int( math.ceil( scale * n ) )
def histogram( counts, opt ):
"""Print a vertical textual histogram"""
# print( counts )
if not opt.histogram:
return
y_resolution = 50
x_resolution = 50
bin_count = 21
max_files = max(counts.values())
max_count = x_resolution + max( (max(counts.keys()), abs(min(counts.keys()) ))) / x_resolution * x_resolution
y_scale = 1 if max_files <= y_resolution else float(y_resolution) / max_files
x_scale = 0.5 * (bin_count-1) / max_count
bins = [0] * bin_count
for key, count in sorted( counts.items() ):
bins[x( bin_count, x_scale, key)] += count
print( '\nHistogram delta line-count:')
i = -(bin_count-1) / 2
for bin in bins:
print('{:+4}:{:3} {}'.format(x_label(x_scale, i), bin, y(y_scale, bin) * '*'))
i += 1
def svn_diff( path, rev1, rev2, opt ):
"""Print diff for given revisions of file"""
if not opt.svn_diff:
return
svn_command = [ cfg_svn, 'diff', '-r', '{}:{}'.format(rev1, rev2), path ]
svn_process = subprocess.Popen( svn_command, stdout=subprocess.PIPE, universal_newlines=True )
out, err = svn_process.communicate()
print( out )
def svn_line_count( path, rev, opt ):
"""Return line count for give revision of file"""
svn_command = [ cfg_svn, 'cat', '-r', '{}'.format(rev), path ]
svn_process = subprocess.Popen( svn_command, stdout=subprocess.PIPE, universal_newlines=True )
out, err = svn_process.communicate()
return sum(1 for c in out if c in ['\n'])
def svn_line_count2( path, rev, opt ):
"""Using wc -l: not noticably faster than counting lines in Python"""
svn_command = [ cfg_svn, 'cat', '-r', '{}'.format(rev), path ]
wcl_command = [ cfg_wc , '-l' ]
svn_process = subprocess.Popen( svn_command, stdout=subprocess.PIPE, universal_newlines=True )
wcl_process = subprocess.Popen( wcl_command, stdout=subprocess.PIPE, universal_newlines=True, stdin=svn_process.stdout )
out, err = wcl_process.communicate()
return int(out)
def split_revision( revision, opt ):
"""Return revision like 123:[HEAD], 123:456 as tuple"""
revs = revision.split(':')
return (revs[0], revs[1]) if len(revs) == 2 else (revs[0], 'HEAD')
def nochange_only( threshold ):
return int(threshold) == 0
def additions_only( threshold ):
return threshold[0] == '+'
def removals_only( threshold ):
return threshold[0] == '-'
def printSingleFile( path, rev1, rev2, lc_rev1, lc_rev2, opt ):
print( '{path}: ({lc1} -> {lc2}): {diff:+d}'.format(path=path, lc1=lc_rev1, lc2=lc_rev2, diff=(lc_rev2-lc_rev1) ) )
svn_diff( path, rev1, rev2, opt )
def compareRevisionsSingleFile( path, revision, opt ):
"""Compare line count of different revisions of a single file"""
rev1, rev2 = split_revision( revision, opt )
lc_rev1 = svn_line_count( path, rev1, opt )
lc_rev2 = svn_line_count( path, rev2, opt )
lc_diff = lc_rev2 - lc_rev1
if opt.verbose:
print( '{path}: rev {rev1}:{rev2} - lines: {lc1} -> {lc2} = {diff}'.format(path=path, rev1=rev1, rev2=rev2, lc1=lc_rev1, lc2=lc_rev2, diff=(lc_rev2-lc_rev1) ) )
threshold_num = int(opt.threshold)
if nochange_only( opt.threshold ):
if lc_diff == 0:
print( '{path}: line count unchanged'.format(path=path) )
elif additions_only( opt.threshold ):
if lc_diff > threshold_num:
printSingleFile( path, rev1, rev2, lc_rev1 , lc_rev2, opt )
elif removals_only( opt.threshold ):
if lc_diff < threshold_num:
printSingleFile( path, rev1, rev2, lc_rev1 , lc_rev2, opt )
elif abs(lc_diff) > threshold_num:
printSingleFile( path, rev1, rev2, lc_rev1 , lc_rev2, opt )
return lc_diff
def compareRevisionsOfFiles( paths, revision, opt ):
"""Perform action for each file in list"""
counts = dict()
for f in paths:
count = compareRevisionsSingleFile( f, revision, opt )
if count not in counts:
counts[count] = 1
else:
counts[count] += 1
return counts
def file_list( paths ):
"""Return files for wildcards and folders"""
result = []
for g in paths:
if os.path.isdir(g):
result.extend( file_list([os.path.join(g,'*')]) )
else:
result.extend( [f for f in glob.glob(g) if os.path.isfile(f)])
return result
def main():
"""Compare number of lines in different revisions of files."""
parser = argparse.ArgumentParser(
description='Compare number of lines in different revisions of files.',
epilog="""""",
formatter_class=argparse.ArgumentDefaultsHelpFormatter )
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='report the file being processed')
parser.add_argument(
'--revision',
metavar='rev',
type=str,
default='1:HEAD',
help='revision(s), like 123[:HEAD] 123:456')
parser.add_argument(
'--threshold',
metavar='num',
type=str,
default='10',
help='reporting threshold: 0, n, -n, +n')
parser.add_argument(
'--histogram',
action='store_true',
help='print histogram of line count changes')
parser.add_argument(
'--svn-diff',
action='store_true',
help='print svn diff output')
parser.add_argument(
'paths',
metavar='path',
type=str,
nargs='+',
help='paths of directories and files to process')
opt = parser.parse_args()
histogram( compareRevisionsOfFiles( file_list( opt.paths ), opt.revision, opt ), opt )
if __name__ == '__main__':
main()
# end of file
usage: svn-diff-size.py [-h] [-v] [--revision rev] [--threshold num] [--histogram] [--svn-diff] path [path ...]
Compare number of lines in different revisions of files.
positional arguments:
path paths of directories and files to process
optional arguments:
-h, --help show this help message and exit
-v, --verbose report the file being processed (default: False)
--revision rev revision(s), like 123[:HEAD] 123:456 (default: 1:HEAD)
--threshold num reporting threshold: 0, n, -n, +n (default: 10)
--histogram print histogram of line count changes (default: False)
--svn-diff print svn diff output (default: False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment