Last active
December 28, 2015 22:58
-
-
Save bzz/7574958 to your computer and use it in GitHub Desktop.
Searches for (and remove) large files from git repository
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#Lists all large files in git repostory for the given branch. | |
#Needs to be run from repository subdirecory, | |
#Assumes git and perl are availblse | |
# | |
#http://blog.jessitron.com/2013/08/finding-and-removing-large-files-in-git.html | |
# and http://git-scm.com/book/ch6-4.html for repo history rewrite exlanation | |
# | |
#To get argregated statistics on output use: | |
#$ cat large_files_dev.txt | awk '{t+=$1} END {print t/1024/1024 "Mb"}' | |
# | |
#To actually re-write the history use: (see https://help.github.com/articles/remove-sensitive-data) | |
#$ git filter-branch -d ~/tmp --index-filter \ | |
# 'git rm --cached --ignore-unmatch \ | |
# `cat /Users/alex/Documents/_NFLabs/peloton-master/large_files_all_branches.txt | cut -d " " -f 2` ' \ | |
# --prune-empty --tag-name-filter cat -- --all | |
# | |
#Pro-tip: mount ~/tmp to tmpfs in RAM | |
# | |
#Make sure we filtered all refs | |
# for file in `cat large_files_master.txt | cut -d " " -f 2`; do git log --name-only --follow --all -- $file; done | |
# | |
#To cleanup the repository use: | |
# either clean clone from it | |
# or | |
#$ rm -rf .git/refs/original/ | |
#$ git reflog expire --expire=now --all | |
#$ git gc --prune=now | |
#$ git gc --aggressive --prune=now | |
usage(){ | |
echo "Usage: $0 <brach-name>" | |
echo "Lists all largre files for the given brach-name" | |
echo "Pro-tip: use --all as a branch name" | |
exit 1 | |
} | |
# invoke usage | |
# call usage() function if branch name not supplied | |
[[ $# -eq 0 ]] && usage | |
#work in masted by default | |
BRANCH="master" | |
if [ ! -z "$1" ] | |
then | |
BRANCH=$1 | |
fi | |
OUTPUT_FILE="large_files_${BRANCH}.txt" | |
echo "Listing large files in brach \"${BRANCH}\" ..." | |
git rev-list ${BRANCH} | while read rev; do git ls-tree -lr $rev | cut -c54- | grep -v '^ '; done | sort -u | perl -e ' | |
while (<>) { | |
chomp; | |
@stuff=split("\t"); | |
$sums{$stuff[1]} += $stuff[0]; | |
} | |
print "$sums{$_} $_\n" for (keys %sums); | |
' | sort -rn >> "${OUTPUT_FILE}" | |
echo "Done. See ${OUTPUT_FILE} for results." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment