Skip to content

Instantly share code, notes, and snippets.

@PaperNick
Last active August 3, 2021 10:05
Show Gist options
  • Save PaperNick/643878fa31835bafef9d022ee5a5177f to your computer and use it in GitHub Desktop.
Save PaperNick/643878fa31835bafef9d022ee5a5177f to your computer and use it in GitHub Desktop.
Download https://musictheory.pugetsound.edu/mt21c for offline usage
#!/bin/bash
# Function taken from https://gist.github.com/cdown/1163649
# Install "urlencode" command from the "gridsite-clients" package if it doesn't work
urlencode() {
# Usage: urlencode <string>
old_lc_collate=$LC_COLLATE
LC_COLLATE=C
local length="${#1}"
for (( i = 0; i < length; i++ )); do
local c="${1:$i:1}"
case $c in
[a-zA-Z0-9.~_-]) printf '%s' "$c" ;;
*) printf '%%%02X' "'$c" ;;
esac
done
LC_COLLATE=$old_lc_collate
}
USER_AGENT_STRING='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
# The folder in which wget is going to download the website
WEBSITE_LOCAL_FOLDER_NAME='musictheory.pugetsound.edu/mt21c'
# Recursively download the whole website and convert the links between pages into relative links, pointing to the local files
wget -rpk -l 5 -np -U "$USER_AGENT_STRING" https://musictheory.pugetsound.edu/mt21c/MusicTheory.html
# Extract all JS URLs which the page depends on, except MathJax lib which cannot run locally
js_assets="$(grep -oP '<script src="\K(.*?)(?=\")' "$WEBSITE_LOCAL_FOLDER_NAME/Accidentals.html" | grep -v 'MathJax\.js')"
# Extract all CSS URLs which the page depends on, except Google fonts
css_assets="$(grep -oP '<link href="\K(.*?)(?=\")' "$WEBSITE_LOCAL_FOLDER_NAME/Accidentals.html" | grep -v 'fonts\.google')"
# Download the JS assets using a subshell to prevent changing the directory in the main script
# Constructs download commands which look like this:
# wget -c "https://pretextbook.org/js/lib/jquery.min.js"
(
mkdir -p "$WEBSITE_LOCAL_FOLDER_NAME/js" &&
cd "$WEBSITE_LOCAL_FOLDER_NAME/js" &&
echo "$js_assets" | xargs -I{} echo 'wget -c "{}"' | bash
)
# Download the CSS files using a subshell
# Constructs download commands which look like this:
# wget -c "https://pretextbook.org/css/0.31/pretext.css"
(
mkdir -p "$WEBSITE_LOCAL_FOLDER_NAME/css" &&
cd "$WEBSITE_LOCAL_FOLDER_NAME/css" &&
echo "$css_assets" | xargs -I{} echo 'wget -c "{}"' | bash
)
echo 'Coverting the JS asset URLs into relative file paths...'
for page in "$WEBSITE_LOCAL_FOLDER_NAME"/*.html; do
for js_url in $(echo -e "$js_assets"); do
js_file_name="${js_url##*/}"
js_file_name_encoded="$(urlencode "$js_file_name")"
sed -i -e "s|$js_url|\./js/$js_file_name_encoded|g" "$page"
done
done
echo 'Coverting the CSS asset URLs into relative file paths...'
for page in "$WEBSITE_LOCAL_FOLDER_NAME"/*.html; do
for css_url in $(echo -e "$css_assets"); do
css_file_name="${css_url##*/}"
css_file_name_encoded="$(urlencode "$css_file_name")"
sed -i -e "s|$css_url|\./css/$css_file_name_encoded|g" "$page"
done
done
echo 'Done!'
echo 'You can now open the MusicTheory.html file to begin studying.'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment