Convert tarballs from opensource.apple.com to Git Repositories
#!/bin/bash | |
export TARGET_URL="https://opensource.apple.com/" | |
export TARGET_REPO="https://github.com/apple" | |
export TARGET_EXTENSION=".tar.gz" | |
export WORKING_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | |
export COMMIT_AUTHOR="repogen" | |
export USE_THREADS=8 | |
# why are we doing this? | |
# because apple's official tarballs directory is incomplete | |
function fetchRawTarballs { | |
mkdir "$WORKING_DIR/.tmp" && cd "$WORKING_DIR/.tmp" | |
wget --accept-regex '\bmacos\b' -r -l0 -H -t1 -nd -N -np -A.html -erobots=off "$TARGET_URL" | |
wget --accept-regex '\bmac\b' -r -l0 -H -t1 -nd -N -np -A.html -erobots=off "$TARGET_URL" | |
rm index*.html introduction*.html | |
cd "$WORKING_DIR" | |
cat $WORKING_DIR/.tmp/*.html | grep -i "$TARGET_EXTENSION" | sed -n 's/.*href="\([^"]*\).*/\1/p' | sort | uniq | head -n -3 | sed -e "s|^|$TARGET_URL|g" > "$WORKING_DIR/.tmp/filelist.txt" | |
cat $WORKING_DIR/.tmp/filelist.txt | tr '\n' '\0' | xargs -0 wget -x -c | |
} | |
function tarballToCommit { | |
PKG_NAME="$1" | |
PKG_VER="$(echo $2 | sed "s|$TARGET_EXTENSION||g")" | |
TEMP_UUID="$(uuidgen | tr '[:upper:]' '[:lower:]')" | |
# Get archive | |
cd "$WORKING_DIR/repo/$PKG_NAME" | |
cp "$WORKING_DIR/opensource.apple.com/tarballs/$PKG_NAME/$PKG_VER.tar.gz" "$WORKING_DIR/repo/$PKG_NAME/$PKG_VER.tar.gz" | |
# Extract it | |
tar zxf "$PKG_VER.tar.gz" | |
# Move to main directory | |
rm "$WORKING_DIR/repo/$PKG_NAME/$PKG_VER.tar.gz" | |
mv "$WORKING_DIR/repo/$PKG_NAME/$PKG_VER/"* "$WORKING_DIR/repo/$PKG_NAME" | |
rm -rf "$WORKING_DIR/repo/$PKG_NAME/$PKG_VER" | |
# Make commit | |
git add -A && git commit -a -m "$PKG_VER" | |
# Backup .git, delete everything, restore .git | |
mkdir "$WORKING_DIR/.$TEMP_UUID" | |
mv "$WORKING_DIR/repo/$PKG_NAME/.git" "$WORKING_DIR/.$TEMP_UUID" | |
# let's bet the system is posix compiliant and run this destructive command anyways | |
rm -rf "$WORKING_DIR/repo/$PKG_NAME/"* 2> /dev/null | |
rm -rf "$WORKING_DIR/repo/$PKG_NAME/".* 2> /dev/null | |
mv "$WORKING_DIR/.$TEMP_UUID/.git" "$WORKING_DIR/repo/$PKG_NAME" | |
rm -rf "$WORKING_DIR/.$TEMP_UUID" | |
} | |
export -f tarballToCommit | |
function procTarballDirToGitRepo { | |
# Make directory for repo | |
mkdir -p "$WORKING_DIR/repo/$1/" && cd "$WORKING_DIR/repo/$1" | |
# Setup repo | |
git init | |
git config gc.auto 0 | |
git config user.name $COMMIT_AUTHOR | |
git config user.email "290012fe-not-an-email@93918986-8ed8-4f23-86c8-6fafdb5a5bd7.null" | |
git remote add origin "$TARGET_REPO/$1" | |
# Start restoring tarballs | |
ls -1 "$WORKING_DIR/opensource.apple.com/tarballs/$1" | sort -t'-' -nk2 | tr '\n' '\0' | xargs -0 -I % bash -c 'tarballToCommit '$1' "$@"' _ % | |
# Reset to last commit | |
git reset --hard | |
} | |
export -f procTarballDirToGitRepo | |
function tarballDirToGitRepo { | |
# Use directory names as repo names and start processing each directory of tarballs | |
ls "$WORKING_DIR/opensource.apple.com/tarballs/" -1 | tr '\n' '\0' | xargs -0 -P $USE_THREADS -I % bash -c 'procTarballDirToGitRepo "$@"' _ % | |
} | |
export -f tarballDirToGitRepo | |
rm -rf "$WORKING_DIR/.tmp" | |
fetchRawTarballs | |
tarballDirToGitRepo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment