-
-
Save steffen/30e537a1ce6f754d6224456850ce6c35 to your computer and use it in GitHub Desktop.
git-du script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# | |
# Generate a listing of all paths ever used in the repository, along with the | |
# disk space used by the path throughout the entire history. Note that sizes | |
# for trees are cumulative; they include the sizes of all of the paths below | |
# them, in addition to the tree storage itself. All sizes are in bytes, and | |
# reflect git's delta and zlib compression. | |
# | |
# One caveat is that this is just the _current_ on-disk size. The on-disk size | |
# of each object may change if git repacks and chooses different delta bases, | |
# for example. Likewise, the mapping of paths to objects may change (e.g., a | |
# given blob object may appear at multiple paths, and we assign it to only one | |
# path slot). So take it as a rough guide; removing particular paths | |
# from your history may not remove exactly that many bytes from the repository | |
# size. | |
use strict; | |
my $commits = { size => 0 }; | |
my $tags = { size => 0 }; | |
my $tree = { size => 0 }; | |
open(my $git, '-|', | |
'git rev-list --objects --all | | |
git cat-file --batch-check="%(objectsize:disk) %(objecttype) %(rest)" | |
'); | |
while (<$git>) { | |
my ($size, $type, $name) = /^(\d+) (.*?) (.*)/; | |
if ($type eq 'commit') { | |
add_to_bucket($commits, '', $size); | |
} elsif ($type eq 'tag') { | |
add_to_bucket($tags, '', $size); | |
} else { | |
add_to_bucket($tree, $name, $size); | |
} | |
} | |
show_bucket($commits, 'COMMITS', 0); | |
show_bucket($tags, 'TAGS', 0); | |
show_bucket($tree, '/', 0); | |
sub add_to_bucket { | |
my ($node, $name, $size) = @_; | |
$node->{size} += $size; | |
my @name = split('/', $name); | |
while (@name) { | |
$node = $node->{child}->{shift @name} ||= { size => 0 }; | |
$node->{size} += $size; | |
} | |
} | |
sub show_bucket { | |
my ($node, $name, $indent) = @_; | |
print " " x $indent; | |
printf "%10s %s\n", $node->{size}, $name; | |
for my $child (sort(keys %{$node->{child}})) { | |
my $child_name = $name eq '/' ? $child : "$name/$child"; | |
show_bucket($node->{child}->{$child}, $child_name, $indent + 1) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment