Skip to content

Instantly share code, notes, and snippets.

@waynegraham
Last active February 9, 2024 18:18
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save waynegraham/ed6d074267e60d7fef07 to your computer and use it in GitHub Desktop.
Save waynegraham/ed6d074267e60d7fef07 to your computer and use it in GitHub Desktop.
Get authors from svn repository
wsg4w = Wayne Graham <wsg4w@uva.edu>
$ cd path/to/svn_repo
$ svn log -q | awk -F '|' '/^r/ {sub("^ ", "", $2); sub(" $", "", $2); print $2" = "$2" <"$2">"}' | sort -u > authors.txt
#! /usb/bin/env bash
clear
echo "Finding all objects in the repo..."
git rev-list --objects --all | sort -k 2 > allfileshas.txt
#git rev-list --objects --all | sort -k 2 | cut -f 2 -d\ | uniq
echo "Generating the SHA hashes and sorting them biggest to smallest..."
git gc && git verify-pack -v .git/objects/pack/pack-*.idx | egrep "^\w+ blob\W+[0-9]+ [0-9]+ [0-9]+$" | sort -k 3 -n -r > bigobjects.txt
echo "Generate object SHAs"
for SHA in `cut -f 1 -d\ < bigobjects.txt`; do
echo "Looking up $SHA..."
echo $(grep $SHA bigobjects.txt) $(grep $SHA allfileshas.txt) | awk '{print $1,$3,$7}' >> bigtosmall.txt
done;
echo "Done."
echo "Look at the bigtosmall.txt file for large files.\n"
echo "You can remove any large files from your repo history with:\n"
echo "\t git filter-branch --prune-empty --index-filter 'git rm -rf --cached --ignore-unmatch MY-BIG-DIRECTORY-OR-FILE' --tag-name-filter cat -- --all"
echo "\nYou can then compress it by cloning the repo without hard links:"
echo "\t git clone --no-hardlinks file:///Users/yourUser/your/full/repo/path repo-clone-name"
$ git rev-list --objects --all | grep 17ccd45
17ccd45824bb4cb1e1c8b03e5780fa31175c18ab trunk/branch/cocoon/html/Essex/vol2/gifs/gifs.tar
$ git log --oneline --branches -- trunk/branch/cocoon/html/Essex/vol2/gifs/gifs.tar
...
ddb3b1e Second commit
$ git filter-branch --index-filter 'git rm --ignore-unmatch --cached trunk/branch/cocoon/html/Essex/vol2/gifs/gifs.tar' -- ddb3b1e^..
$ git clone --no-hardlinks file:///Users/yourUser/your/full/repo/salem salem-smaller
$ rm -Rf .git/refs/original
$ rm -Rf .git/logs/
$ git pack-refs –prune
$ git reflog expire --all --expire-unreachable=0
$ git repack -A -d
$ git gc --prune all --aggresive
$ git filter-branch --prune-empty --index-filter 'git rm -rf --cached --ignore-unmatch trunk/branch/cocoon/html' --tag-name-filter cat -- --all
wsg4w = Wayne Graham <wsg4w@uva.edu>
$ git svn clone https://subversion.lib.virginia.edu/repos/salem -T trunk/branch -A authors.txt --no-metadata
$ cd mkdir -p /tmp/salem
$ svnadmin create /tmp/salem
$ echo "exit 0;" > /tmp/salem/hooks/pre-revprop-change
$ chmod +x /tmp/salem/hooks/pre-revprop-change
$ svnsync init file:///tmp/salem https://subversion.lib.virginia.edu/repos/salem
$ svnsync sync file:///tmp/salem
17ccd45824bb4cb1e1c8b03e5780fa31175c18ab 48199680 trunk/branch/cocoon/html/Essex/vol2/gifs/gifs.tar
d726f0a0cab047838e3405ad59d3c5399f42db87 12300550 trunk/branch/cocoon/html/maps/DHS/danvers_hist_soc/put_hse2.tif
06a4076cac85350be52261a8f11df0ecb42d6696 10610964 trunk/branch/cocoon/html/maps/images/musick_nurse.tif
6f9cbe6fa3fd702a70d666160329ef1176dd4a07 8042973 trunk/branch/cocoon/images/small/casey.tif
5b7e8c63a0bacd3dc2ab92db2d1d1cbc2359e69c 4715942 trunk/branch/cocoon/html/archives/essex/eia/large/eia22r.jpg
20ea6bb7b466cd4ba4716834bae7507989ff88b7 3861655 trunk/branch/cocoon/html/archives/essex/eia/large/eia06r.jpg
8a37fabb82418c6e6b07abf08821a053b2dc4b11 3770686 trunk/branch/cocoon/html/archives/essex/eia/large/eia13r.jpg
5fecd828115d3909cbe70de0be3936f96fb61868 3708386 trunk/branch/cocoon/html/maps/DHS/danvers_hist_soc/Summerhouse
$ git svn clone file:///tmp/salem -T trunk/branch -A authors.txt --no-metadata
$ git verify-pack -v .git/objects/pack/pack-*.idx | sort -k 3 -n | tail -5
5b7e8c63a0bacd3dc2ab92db2d1d1cbc2359e69c blob 4715942 4715522 2077929726
f3e135fd90caa6a05a1da13a2afc60c8a0af1063 blob 4743461 1703778 6751124
6f9cbe6fa3fd702a70d666160329ef1176dd4a07 blob 8042973 7227900 1043663492
d18b98c09c0dcbf9edc2f6ccf91672a399c8a79d blob 9662999 2747934 8477062
17ccd45824bb4cb1e1c8b03e5780fa31175c18ab blob 48199680 47913744 93724263
$ git count-objects -v
count: 5414
size: 41548
in-pack: 40222
packs: 1
size-pack: 2164015
prune-packable: 0
garbage: 0
size-garbage: 0
GGfGf$ git count-objects -v
count: 0
size: 0
in-pack: 32347
packs: 1
size-pack: 1978880
prune-packable: 0
garbage: 0
size-garbage: 0
$ git count-objects -v
count: 0
size: 0
in-pack: 24334
packs: 1
size-pack: 966726
prune-packable: 0
garbage: 0
size-garbage: 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment