Created
April 16, 2016 22:26
-
-
Save juliendufresne/52337603011827471a1e6fd1f695e108 to your computer and use it in GitHub Desktop.
Remove large files and directories from git history
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# First we need to find the big files | |
# source: https://stackoverflow.com/questions/10622179/how-to-find-identify-large-files-commits-in-git-history/20460121#20460121 | |
REPO_URL="git@domain.tld:your-repo-here.git" | |
WORKING_DIR=$(mktemp -d); | |
cd "${WORKING_DIR}" | |
git clone --mirror "${REPO_URL}" source.git; | |
cd source.git; | |
git rev-list --objects --all | sort -k 2 > ../allfileshas.txt; | |
git gc && git verify-pack -v objects/pack/pack-*.idx | egrep "^\w+ blob\W+[0-9]+ [0-9]+ [0-9]+$" | sort -k 3 -n -r > ../bigobjects.txt | |
cd - | |
for SHA in `cut -f 1 -d\ < bigobjects.txt`; do | |
echo $(grep $SHA bigobjects.txt) $(grep $SHA allfileshas.txt) | awk '{print $1,$3,$7}' >> bigtosmall.txt | |
done; | |
echo "Now you can analyze file ${WORKING_DIR}/bigtosmall.txt" | |
echo "file content: <SHA> <size> <filepath>" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
WORKING_DIR=$(mktemp -d); | |
SOURCE_REPO_URL="git@domain.tld:your-repo-here.git"; | |
# This will allow you to have a backup in case you removed something not supposed to be removed | |
# You may need to create the repository beforehand (see below) | |
BACKUP_REPO_PATH="your-username/backup_$(date +%Y%m%d%H%M%S)"; | |
BACKUP_REPO_URL="git@domain.tld:${BACKUP_REPO_PATH}.git"; | |
function step | |
{ | |
local str=$1; | |
printf "\e[0;33m%s\e[0;39m\n" "$str"; | |
printf "\e[0;33m%${#str}s\e[0;39m\n" |tr " " "="; | |
} | |
function substep | |
{ | |
local str=$1; | |
printf "\e[0;33m%s\e[0;39m\n" "$str"; | |
printf "\e[0;33m%${#str}s\e[0;39m\n" |tr " " "-"; | |
} | |
step "working directory: ${WORKING_DIR}"; | |
cd "${WORKING_DIR}"; | |
step "Clone source repository ${SOURCE_REPO_URL}"; | |
git clone --mirror "${SOURCE_REPO_URL}" source.git; | |
step "backup to ${BACKUP_REPO_URL}"; | |
substep "Create repository"; | |
# My need was to create a private bitbucket repository. You can ommit thispart if you have created your backup repository manually | |
# For github: https://developer.github.com/v3/repos/#create | |
curl -X POST -s -D - -o /dev/null -u 'your-bitbucket-user:your-bitbucket-pass' -H "Content-Type: application/json" \ | |
https://api.bitbucket.org/2.0/repositories/${BACKUP_REPO_PATH} \ | |
-d '{"scm": "git", "is_private": "true", "fork_policy": "no_public_forks" }' | grep "HTTP/1.1" | grep -i "[0-9]\{3\}"; | |
substep "copy local repository"; | |
cp -r source.git backup.git; | |
cd backup.git; | |
git remote set-url --push origin "${BACKUP_REPO_URL}"; | |
substep "Deep copy of ${SOURCE_REPO_URL} to ${BACKUP_REPO_URL} - run in background"; | |
git push &>/dev/null & | |
step "cleanup repository ${SOURCE_REPO_URL}"; | |
cd ../source.git; | |
substep "filter-branch"; | |
# This is where you need to specify the file/directory you want to remove from your git history. | |
# this must be absolute path from the repository root | |
# Ex: if I want to remote myfile.txt located in <git-root>/src/subdir/myfile.txt, I need to put src/subdir/myfile.txt | |
git filter-branch --prune-empty --index-filter 'git rm -rf --cached --ignore-unmatch <put all your directories and files to remove here> >/dev/null' --tag-name-filter cat -- --all; | |
substep "finalize"; | |
git for-each-ref --format='delete %(refname)' refs/original | git update-ref --stdin; | |
git reflog expire --expire=now --all; | |
git gc --prune=now --aggressive; | |
substep "push to repository ${SOURCE_REPO_URL}"; | |
git push; | |
step "wait for all background process to finish"; | |
for job in `jobs -p` | |
do | |
printf "%s" "wait for procees $job"; | |
wait $job || let "FAIL+=1"; | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment