Last active
April 12, 2024 10:57
-
-
Save mloskot/a648d11468f56e3b7a4f3ffb5fe7a9c0 to your computer and use it in GitHub Desktop.
Count Git LFS objects and their sizes, total and per file type
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Counts Git LFS objects and per file type, and calculates total sizes. | |
import os | |
import sys | |
import tempfile | |
print('Collecting sizes of Git LFS objects') | |
tmp_file = os.path.join(os.getcwd(), 'tmp_sizes.tx') | |
os.system('git lfs ls-files --long --size > {0}'.format(tmp_file)) | |
print('Calculating total size per file type') | |
total = {} | |
to_bytes = {"B": 1, "KB": 10**3, "MB": 10**6, "GB": 10**9, "TB": 10**12} | |
with open(tmp_file, 'r') as f: | |
for line in f: | |
sha, line = line.split(' - ') | |
name, line = line.split(' (') | |
size, line = line.split(')') | |
name, ext = os.path.splitext(name) | |
size, unit = size.split() | |
if not ext in total: | |
total[ext] = { 'count': 0, 'size': 0} | |
total[ext]['count'] += 1 | |
total[ext]['size'] += int(float(size) * to_bytes[unit]) | |
if total: | |
print('Git LFS objects summary:') | |
for k, v in total.items(): | |
print('{}:\tcount: {}\tsize: {:.2f} MB'.format(k, v['count'], v['size'] / 1024**2)) | |
c = sum([ v['count'] for _,v in total.items() ]) | |
s = sum([ v['size'] for _,v in total.items() ]) / 1024**2 | |
print('Total:\tcount: {}\tsize: {:.2f} MB'.format(c, s)) | |
else: | |
print('no summary to print') | |
if os.path.exists(tmp_file): | |
os.unlink(tmp_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# The ls-files report human-friendly sizes. | |
# The poor-man sed commands convert all sizes to bytes. | |
git lfs ls-files --long --size \ | |
| awk '{print $4" "$5" "$3" "$1}' \ | |
| tr -d '()' \ | |
| sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? GB/\1000000000\2/' \ | |
| sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? MB/\1000000\2/' \ | |
| sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? KB/\1000\2/' \ | |
| sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? B/\1\2/' \ | |
| sort --key 1 --numeric-sort --reverse \ | |
| uniq |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Very inspiring.
I used
sort --key 1 --human-numeric-sort --reverse
to avoid conversion (it requires to remove the space between$4
and$5
which is ok). Andgit lfs ls-files --size $@
to be able to pass other parameters such as--all
or--long
(if needed be)