Skip to content

Instantly share code, notes, and snippets.

@kwvg
Last active June 23, 2021 14:45
Show Gist options
  • Save kwvg/5bc22eb891dab9c6a65a7ed31a6ca179 to your computer and use it in GitHub Desktop.
Save kwvg/5bc22eb891dab9c6a65a7ed31a6ca179 to your computer and use it in GitHub Desktop.
Convert tarballs from opensource.apple.com to Git Repositories
#!/bin/bash
export TARGET_URL="https://opensource.apple.com/"
export TARGET_REPO="https://github.com/apple"
export TARGET_EXTENSION=".tar.gz"
export WORKING_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
export COMMIT_AUTHOR="repogen"
export USE_THREADS=8
# why are we doing this?
# because apple's official tarballs directory is incomplete
function fetchRawTarballs {
mkdir "$WORKING_DIR/.tmp" && cd "$WORKING_DIR/.tmp"
wget --accept-regex '\bmacos\b' -r -l0 -H -t1 -nd -N -np -A.html -erobots=off "$TARGET_URL"
wget --accept-regex '\bmac\b' -r -l0 -H -t1 -nd -N -np -A.html -erobots=off "$TARGET_URL"
rm index*.html introduction*.html
cd "$WORKING_DIR"
cat $WORKING_DIR/.tmp/*.html | grep -i "$TARGET_EXTENSION" | gsed -n 's/.*href="\([^"]*\).*/\1/p' | gsort | guniq | ghead -n -3 | gsed -e "s|^|$TARGET_URL|g" > "$WORKING_DIR/.tmp/filelist.txt"
cat $WORKING_DIR/.tmp/filelist.txt | tr '\n' '\0' | xargs -0 wget -x -c
}
function tarballToCommit {
PKG_NAME="$1"
PKG_VER="$(echo $2 | sed "s|$TARGET_EXTENSION||g")"
TEMP_UUID="$(uuidgen | tr '[:upper:]' '[:lower:]')"
# Get archive
cd "$WORKING_DIR/repo/$PKG_NAME"
cp "$WORKING_DIR/opensource.apple.com/tarballs/$PKG_NAME/$PKG_VER.tar.gz" "$WORKING_DIR/repo/$PKG_NAME/$PKG_VER.tar.gz"
# Extract it
tar zxf "$PKG_VER.tar.gz"
# Move to main directory
rm "$WORKING_DIR/repo/$PKG_NAME/$PKG_VER.tar.gz"
mv "$WORKING_DIR/repo/$PKG_NAME/$PKG_VER/"* "$WORKING_DIR/repo/$PKG_NAME"
rm -rf "$WORKING_DIR/repo/$PKG_NAME/$PKG_VER"
# Make commit
git add -A && git commit -a -m "$PKG_VER"
# Backup .git, delete everything, restore .git
mkdir "$WORKING_DIR/.$TEMP_UUID"
mv "$WORKING_DIR/repo/$PKG_NAME/.git" "$WORKING_DIR/.$TEMP_UUID"
# let's bet the system is posix compiliant and run this destructive command anyways
rm -rf "$WORKING_DIR/repo/$PKG_NAME/"* 2> /dev/null
rm -rf "$WORKING_DIR/repo/$PKG_NAME/".* 2> /dev/null
mv "$WORKING_DIR/.$TEMP_UUID/.git" "$WORKING_DIR/repo/$PKG_NAME"
rm -rf "$WORKING_DIR/.$TEMP_UUID"
}
export -f tarballToCommit
function procTarballDirToGitRepo {
# Make directory for repo
mkdir -p "$WORKING_DIR/repo/$1/" && cd "$WORKING_DIR/repo/$1"
# Setup repo
git init
git config gc.auto 0
git config user.name $COMMIT_AUTHOR
git config user.email "290012fe-not-an-email@93918986-8ed8-4f23-86c8-6fafdb5a5bd7.null"
git remote add origin "$TARGET_REPO/$1"
# Start restoring tarballs
ls -1 "$WORKING_DIR/opensource.apple.com/tarballs/$1" | gsort -t'-' -nk2 | tr '\n' '\0' | xargs -0 -I % bash -c 'tarballToCommit '$1' "$@"' _ %
# Reset to last commit
git reset --hard
}
export -f procTarballDirToGitRepo
function tarballDirToGitRepo {
# Use directory names as repo names and start processing each directory of tarballs
ls "$WORKING_DIR/opensource.apple.com/tarballs/" -1 | tr '\n' '\0' | xargs -0 -P $USE_THREADS -I % bash -c 'procTarballDirToGitRepo "$@"' _ %
}
export -f tarballDirToGitRepo
rm -rf "$WORKING_DIR/.tmp"
fetchRawTarballs
# tarballDirToGitRepo
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment