Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Show large files in Git repository
#!/usr/bin/env python
#
# Print list of large files in Git
# Copyright (c) 2015 Adam Strzelecki
#
# Installation:
# Place it somewhere in your path, then use with `git large-files'
#
import os, sys, re
def usage():
print '''usage: git large-files size<k|m> [rev-range]
examples:
git large-files 1m
git large-files 10m master^^^..
'''
def large_files(min_size, rev_range='HEAD'):
"""Print large files"""
big_files = []
for revision in os.popen('git rev-list %s' % rev_range):
for f in os.popen('git ls-tree -zrl %s' % revision).read().split('\0'):
if f:
mode, type, commit, size, path = f.split(None, 4)
size = int(size)
if size > min_size:
big_files.append((int(size), commit, path))
for f in sorted(set(big_files), reverse=True):
print '%-8s %-40s %s' % (sizeof_fmt(f[0]), f[1], f[2])
from math import log
def sizeof_fmt(num):
"""Human friendly file size"""
unit_list = zip(['bytes', 'kB', 'MB', 'GB', 'TB', 'PB'], [0, 0, 1, 2, 2, 2])
if num > 1:
exponent = min(int(log(num, 1024)), len(unit_list) - 1)
quotient = float(num) / 1024**exponent
unit, num_decimals = unit_list[exponent]
format_string = '{:.%sf} {}' % (num_decimals)
return format_string.format(quotient, unit)
elif num == 0:
return '0 b'
elif num == 1:
return '1 b'
def parse_size(str):
"""Parse size into a number"""
match = re.match(r'(\d+)([km])?', str.lower())
unit_values = {None:1, 'k': 1024, 'm': 1024*1024}
if match:
size = int(match.group(1))
unit_size = unit_values[match.group(2)]
return size * unit_size
else:
return None
if len(sys.argv) >= 2 and len(sys.argv) <= 3:
size = parse_size(sys.argv[1].lower())
if size:
if len(sys.argv) >= 3:
large_files(size, sys.argv[2])
else:
large_files(size)
else:
usage()
else:
usage()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.