Last active
March 28, 2018 03:41
-
-
Save jcfr/4348af13d2c8931daeab4ff9ab73e14b to your computer and use it in GitHub Desktop.
Shell script listing the N largest file found in the history of a git-versioned project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -eo pipefail | |
# | |
# This script will list the N largest files found in the history of a Git project. | |
# | |
# References: | |
# * https://docs.acquia.com/article/removing-large-files-git-without-losing-history | |
# * https://stackoverflow.com/questions/10622179/how-to-find-identify-large-files-commits-in-git-history | |
# * https://git-scm.com/book/en/v2/Git-Internals-Packfiles | |
# | |
PROG=$(basename $0) | |
#------------------------------------------------------------------------------- | |
err() { echo -e >&2 ERROR: $@\\n; } | |
die() { err $@; exit 1; } | |
help() { | |
cat >&2 <<ENDHELP | |
Usage: $PROG N [options] | |
List the N largest files found in the history of a Git project. | |
Options: | |
-h, --human-readable print human readable sizes(e.g., 1K 234M 2G) | |
-t, --table pretty print results as a table instead of comma separated lines | |
ENDHELP | |
} | |
#------------------------------------------------------------------------------- | |
if [ ! -d .git ]; then | |
err "Execute the script at the root of a git-versioned project" | |
fi | |
table=0 | |
human_readable=0 | |
while [[ $# != 0 ]]; do | |
case $1 in | |
--human-readable|-h) | |
human_readable=1 | |
shift 1 | |
;; | |
--table|-t) | |
table=1 | |
shift 1 | |
;; | |
-*) | |
err Unknown option \"$1\" | |
help | |
exit 1 | |
;; | |
*) | |
break | |
;; | |
esac | |
done | |
N=$1 | |
if [ "$N" == "" ]; then | |
err Missing N option | |
help | |
exit 1 | |
fi | |
format_size(){ | |
if [ $human_readable == 1 ]; then | |
echo $1 | numfmt --to=iec-i --suffix=B --padding=12 | |
else | |
echo $1 | |
fi | |
} | |
display_line(){ | |
if [ $table == 1 ]; then | |
printf "%12s %12s %-40s %s\n" $@ | |
else | |
printf "%s,%s,%s,%s\n" $@ | |
fi | |
} | |
all_objects=`git rev-list --all --objects` | |
display_line "size" "pack_size" "sha" "location" | |
git verify-pack -v .git/objects/pack/pack-*.idx | sort -k 3 -nr | head -n $N | while read line | |
do | |
sha=$(echo $line | cut -f1 -d" "); | |
size=$(format_size $(echo $line | cut -f3 -d" ")); | |
compressed_size=$(format_size $(echo $line | cut -f4 -d" ")); | |
sha_and_location=$(echo "${all_objects}" | grep $sha 2>&1) | |
sha=$(echo ${sha_and_location} | sed "r/ +//" | cut -d" " -f1) | |
location=$(echo ${sha_and_location} | sed "r/ +//" | cut -d" " -f2) | |
display_line $size $compressed_size $sha $location | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example of output:
This will most likely change after the Slicer project move way from
git-svn
and use only git. Indeed, the history will be trimmed and filtered