Skip to content

Instantly share code, notes, and snippets.

@afeblot
Last active February 8, 2022 14:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afeblot/09418e0d6efbe6a0e451dc26c630acc2 to your computer and use it in GitHub Desktop.
Save afeblot/09418e0d6efbe6a0e451dc26c630acc2 to your computer and use it in GitHub Desktop.
In a git repo, lists all files larger than a given size, displays in which commit they appear, and which branches these commits are part of.
#! /usr/bin/env python3
sizeLimit = 500*1024 # bytes
import subprocess
import sys
def execute(shellCmd):
return subprocess.check_output(shellCmd, shell=True, encoding='utf8')
def getLargeFiles():
largeFiles = []
for line in execute('git rev-list --objects --all | git cat-file --batch-check="%(objecttype) %(objectname) %(objectsize) %(rest)" | sed -n "s/^blob //p" | sort --numeric-sort --key=2').strip().split('\n'):
tokens = line.split(' ')
if int(tokens[1]) > sizeLimit:
largeFiles.append({
'blobId': tokens[0],
'size': int(tokens[1]),
'path': tokens[2],
})
return largeFiles
def getCommits(blobId):
commits = []
for line in execute(f'git log --all --find-object={blobId} --date=format:"%Y-%m-%d %H:%M:%S" --pretty=format:"%h -- %ad -- %cn -- %s"').strip().split('\n'):
tokens = line.split(' -- ')
commits.append({
'sha': tokens[0],
'date': tokens[1],
'author': tokens[2],
'msg': tokens[3],
})
return commits
def getBranches(sha):
branches = []
for line in execute(f'git branch --remote --contains={sha}').strip().split('\n'):
branches.append(line.strip().removeprefix('origin/'))
return branches
def humanSize(num, suffix="B"):
for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
if abs(num) < 1024.0:
return f"{num:3.1f}{unit}{suffix}"
num /= 1024.0
return f"{num:.1f}Yi{suffix}"
def showLargeFile(largeFile):
print(f'{largeFile["path"]} ({humanSize(largeFile["size"])})')
for commit in getCommits(largeFile['blobId']):
print(f' in commit {commit["sha"]} by {commit["author"]} on {commit["date"]}: {commit["msg"]}')
for branch in getBranches(commit["sha"]):
print(f' in branch {branch}')
print()
print(f'Files larger than {humanSize(sizeLimit)}:\n')
for largeFile in getLargeFiles():
showLargeFile(largeFile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment