Last active
February 8, 2022 14:53
-
-
Save afeblot/09418e0d6efbe6a0e451dc26c630acc2 to your computer and use it in GitHub Desktop.
In a git repo, lists all files larger than a given size, displays in which commit they appear, and which branches these commits are part of.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
sizeLimit = 500*1024 # bytes | |
import subprocess | |
import sys | |
def execute(shellCmd): | |
return subprocess.check_output(shellCmd, shell=True, encoding='utf8') | |
def getLargeFiles(): | |
largeFiles = [] | |
for line in execute('git rev-list --objects --all | git cat-file --batch-check="%(objecttype) %(objectname) %(objectsize) %(rest)" | sed -n "s/^blob //p" | sort --numeric-sort --key=2').strip().split('\n'): | |
tokens = line.split(' ') | |
if int(tokens[1]) > sizeLimit: | |
largeFiles.append({ | |
'blobId': tokens[0], | |
'size': int(tokens[1]), | |
'path': tokens[2], | |
}) | |
return largeFiles | |
def getCommits(blobId): | |
commits = [] | |
for line in execute(f'git log --all --find-object={blobId} --date=format:"%Y-%m-%d %H:%M:%S" --pretty=format:"%h -- %ad -- %cn -- %s"').strip().split('\n'): | |
tokens = line.split(' -- ') | |
commits.append({ | |
'sha': tokens[0], | |
'date': tokens[1], | |
'author': tokens[2], | |
'msg': tokens[3], | |
}) | |
return commits | |
def getBranches(sha): | |
branches = [] | |
for line in execute(f'git branch --remote --contains={sha}').strip().split('\n'): | |
branches.append(line.strip().removeprefix('origin/')) | |
return branches | |
def humanSize(num, suffix="B"): | |
for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: | |
if abs(num) < 1024.0: | |
return f"{num:3.1f}{unit}{suffix}" | |
num /= 1024.0 | |
return f"{num:.1f}Yi{suffix}" | |
def showLargeFile(largeFile): | |
print(f'{largeFile["path"]} ({humanSize(largeFile["size"])})') | |
for commit in getCommits(largeFile['blobId']): | |
print(f' in commit {commit["sha"]} by {commit["author"]} on {commit["date"]}: {commit["msg"]}') | |
for branch in getBranches(commit["sha"]): | |
print(f' in branch {branch}') | |
print() | |
print(f'Files larger than {humanSize(sizeLimit)}:\n') | |
for largeFile in getLargeFiles(): | |
showLargeFile(largeFile) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment