# This script scrapes Crunchyroll to get an m3u8 and caption file.
# It then uses ffmpeg to download + mux it all into a single mp4.
# It uses awk for EVERYTHING. Sorry? I really wanted to use Python or something
# but that would make it into just a Python thing, or Node or whatever.
# This was painful though because I hate awk.
# See if argv has enough params
if [ $# -lt 2 ]; then
echo "Usage:" $0 "[Enter the Crunchyroll episode URL here, the one you use to view it with in the browser]" "[Put output mp4 file here]"
exit 0
# Let's proceed using $1 as the URL
# IF THIS WORKS, this will be two (https) urls glued together, the first one as the m3u8 and the second one as the English (US) caption file
urls=$(wget -qO- $1 | awk '{split($0, a, ".media = "); split(a[2], b, "hls\",\"audio_lang\":\"jaJP\",\"hardsub_lang\":null,\"url\":\""); split(b[2], c, "\",\"res"); gsub(/\\/, "", c[1]); split($0, d, "\"language\":\"enUS\",\"url\":\""); split(d[2], e, "\",\"title\":\"En"); gsub(/\\/, "", e[1]); printf c[1]; printf e[1]}')
# If the above doesn't work, then it will probably be a line feed, but let's check the length anyway
if [ ${#urls} -lt 2 ]; then
echo "Oops, the awk didn't work."
echo "This could be because:"
printf "\t* The video didn't have BOTH Japanese dub/no-sub AND English (US) subtitles\n"
printf "\t* The awk just plain sucks and failed to parse the page\n"
printf "\t* Crunchyroll changed something on their page\n"
printf "\t* You didn't link the right page\n"
printf "\t* You don't have awk or wget installed for some reason (wget is more lightweight than curl)\n"
printf "\t* Or, the page just failed to load (try diagnosing this with wget)\n"
exit 1
# $urls is valid at this point, so let's separate the URLs...
# $url1 will be the m3u8, and $url2 will be the captions.
# We're using awk to separate these as well because I'm stupid.
url1=$(echo $urls | awk '{split($0, a, "https://"); printf "https://" a[2]}')
url2=$(echo $urls | awk '{split($0, a, "https://"); printf "https://" a[3]}')
# We don't need $urls anymore, so unset it...
unset urls
# $2 is the output MP4.
# Let's print some bold text informing you that it's running ffmpeg now.
printf "\n\033[1mGot URLs, running the underlying ffmpeg now!\033[0m\n\n"
# okay now just run ffmpeg and die
ffmpeg -i $url1 -i $url2 -c copy -c:s mov_text $2
