Skip to content

Instantly share code, notes, and snippets.

@kergoth
Created March 21, 2011 19:11
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kergoth/880013 to your computer and use it in GitHub Desktop.
Save kergoth/880013 to your computer and use it in GitHub Desktop.
Clone a url, caching it, so all clones of a given url share the same core set of git objects
#!/bin/sh
#
# git-cached-clone
#
# Clone a url to a destination, as git clone does, but cache as well, such
# that all clones of a given url share the same core git objects, which saves
# disk space and reduces the load on the git server. This is transparent to
# the user of the cloned repository, as it still points at upstream, and
# future updates from that clone will use the original URL, not the cache.
#
# If you attempt to clone a URL while offline, and the url has been cached, it
# will still give you an entirely functional cloned repository, but its
# contents will be based upon the most recent cache update.
#
# This updates the cache for a given URL whenever you clone from that URL.
set -e
cachedir=$HOME/.git-cache
main () {
process_options "$@" || usage
shift $(($OPTIND - 1))
url="$1"
shift || usage
if ! echo "$url" | grep -q "\.git/\?$"; then
url="$url.git"
fi
dest="$1"
if [ -z "$dest" ]; then
dest="$(basename_url "$url")"
fi
cache_url="$(get_cache_url "$url")"
ret=0
cached="$(setup_cache "$cache_url")" || ret=$?
if [ $ret -ne 0 ] && [ ! -e "$cached" ]; then
exit $ret
fi
local_clone "$url" "$cache_url" "$cached" "$dest"
}
process_options () {
while getopts lsuh opt; do
case "$opt" in
l)
list_cache
exit 0
;;
s)
sanitize_cache
exit 0
;;
u)
update_cache
exit 0
;;
\?|h)
return 1
;;
esac
done
}
list_cache () {
echo "Cached repositories:"
for repo in $cachedir/*/; do
git "--git-dir=$repo" config --get-regexp "remote\..*\.url" | \
while read entry url; do
echo " $url"
done
done
}
sanitize_cache () {
for repo in $cachedir/*/; do
git "--git-dir=$repo" config --get-regexp "remote\..*\.url" | \
while read entry url; do
adjusted_url="$(proper_url "$url")"
if [ "$url" != "$adjusted_url" ]; then
git "--git-dir=$repo" remote set-url origin "$adjusted_url"
echo "Changed url for $repo to $adjusted_url"
fi
done
done
}
update_cache () {
for repo in $cachedir/*/; do
git "--git-dir=$repo" fetch --all
done
}
usage () {
echo >&2 "Usage: git cached-clone URL [DEST]"
echo >&2
echo >&2 " git cached-clone -l"
echo >&2 " List cloned repositories"
echo >&2
echo >&2 " git cached-clone -s"
echo >&2 " Sanitize cloned repositories"
echo >&2
echo >&2 " git cached-clone -u"
echo >&2 " Update cloned repositories"
exit 0
}
get_cache_url () {
url="$(proper_url "$1")"
git config --global --get-regexp "url\..*\.insteadof" | {
while read real other; do
realurl="$(echo "$real"|sed 's,^url\.,,; s,\.insteadof$,,')"
if echo "$url" | grep -q "^$other"; then
echo $url | sed -e "s,^$other,$realurl,"
return
fi
done
echo "$url"
}
}
proper_url () {
url="$1"
echo "$url" | \
sed -e 's,^git://github,https://github,' \
-e 's,^http://github,https://github,' \
-e 's,^git://openembedded.org,git://git.openembedded.org,'
}
basename_url () {
url="$1"
desturl="$(echo "$url"|tr ':' '/'|sed 's,\.git/*$,,')"
sanitize "$(basename "$desturl")"
}
bare_clone () {
url="$1"; dest="$2"
remote="$(hash "$url")"
if [ ! -d "$dest" ]; then
mkdir -p "$dest"
git "--git-dir=$dest" init --quiet --bare
fi
git "--git-dir=$dest" config remote.$remote.url "$(proper_url "$url")"
git "--git-dir=$dest" config remote.$remote.fetch "+refs/*:refs/remotes/$remote/*"
git "--git-dir=$dest" config remote.$remote.tagopt "--no-tags"
git "--git-dir=$dest" fetch $remote
}
local_clone () {
url="$1"; cache_url="$2"; cached="$3"; dest="$4"
cache_remote="$(hash "$cache_url")"
if [ -d "$dest" ]; then
echo >&2 "Error: $dest already exists"
exit 1
fi
mkdir "$dest"
cd $dest
git init --quiet
mkdir -p .git/objects/info
echo "$cached/objects" > .git/objects/info/alternates
git config remote.origin.url "$cached"
git config remote.origin.fetch "+refs/remotes/$cache_remote/tags/*:refs/tags/*"
git config --add remote.origin.fetch "+refs/remotes/$cache_remote/heads/*:refs/remotes/origin/*"
git fetch origin
git config remote.origin.url "$url"
git config --replace-all remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"
git co -q -b master origin/master
cd - >/dev/null
}
setup_cache () {
url="$1"
cached="$cachedir/$(basename_url "$url").git"
echo "$cached"
bare_clone "$url" "$cached" >&2 || return 1
}
hash () {
echo "$@" | sha1sum | cut -c1-7
}
sanitize () {
echo "$@" | tr "[:upper:]" "[:lower:]" | \
tr "[]()&~@#%^&*()_+=;:,\$/" " " | tr " " "_"
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment