ariankordi/crunchyroll-lmao.sh

## crunchyroll-lmao.sh
#!/bin/sh
# This script scrapes Crunchyroll to get an m3u8 and caption file.
# It then uses ffmpeg to download + mux it all into a single mp4.

# It uses awk for EVERYTHING. Sorry? I really wanted to use Python or something
# but that would make it into just a Python thing, or Node or whatever.
# This was painful though because I hate awk.

# See if argv has enough params
if [ $# -lt  2 ]; then
	echo "Usage:" $0 "[Enter the Crunchyroll episode URL here, the one you use to view it with in the browser]" "[Put output mp4 file here]"
	exit 0
fi

# Let's proceed using $1 as the URL
# IF THIS WORKS, this will be two (https) urls glued together, the first one as the m3u8 and the second one as the English (US) caption file
urls=$(wget -qO- $1 | awk '{split($0, a, ".media = "); split(a[2], b, "hls\",\"audio_lang\":\"jaJP\",\"hardsub_lang\":null,\"url\":\""); split(b[2], c, "\",\"res"); gsub(/\\/, "", c[1]); split($0, d, "\"language\":\"enUS\",\"url\":\""); split(d[2], e, "\",\"title\":\"En"); gsub(/\\/, "", e[1]); printf c[1]; printf e[1]}')
# If the above doesn't work, then it will probably be a line feed, but let's check the length anyway
if [ ${#urls} -lt 2 ]; then
	echo "Oops, the awk didn't work."
	echo "This could be because:"
	printf "\t* The video didn't have BOTH Japanese dub/no-sub AND English (US) subtitles\n"
	printf "\t* The awk just plain sucks and failed to parse the page\n"
	printf "\t* Crunchyroll changed something on their page\n"
	printf "\t* You didn't link the right page\n"
	printf "\t* You don't have awk or wget installed for some reason (wget is more lightweight than curl)\n"
	printf "\t* Or, the page just failed to load (try diagnosing this with wget)\n"
	exit 1
fi

# $urls is valid at this point, so let's separate the URLs...
# $url1 will be the m3u8, and $url2 will be the captions.
# We're using awk to separate these as well because I'm stupid.
url1=$(echo $urls | awk '{split($0, a, "https://"); printf "https://" a[2]}')
url2=$(echo $urls | awk '{split($0, a, "https://"); printf "https://" a[3]}')
# We don't need $urls anymore, so unset it...
unset urls

# $2 is the output MP4.
# Let's print some bold text informing you that it's running ffmpeg now.
printf "\n\033[1mGot URLs, running the underlying ffmpeg now!\033[0m\n\n"

# okay now just run ffmpeg and die
ffmpeg -i $url1 -i $url2 -c copy -c:s mov_text $2
	#!/bin/sh
	# This script scrapes Crunchyroll to get an m3u8 and caption file.
	# It then uses ffmpeg to download + mux it all into a single mp4.

	# It uses awk for EVERYTHING. Sorry? I really wanted to use Python or something
	# but that would make it into just a Python thing, or Node or whatever.
	# This was painful though because I hate awk.

	# See if argv has enough params
	if [ $# -lt 2 ]; then
	echo "Usage:" $0 "[Enter the Crunchyroll episode URL here, the one you use to view it with in the browser]" "[Put output mp4 file here]"
	exit 0
	fi

	# Let's proceed using $1 as the URL
	# IF THIS WORKS, this will be two (https) urls glued together, the first one as the m3u8 and the second one as the English (US) caption file
	urls=$(wget -qO- $1 \| awk '{split($0, a, ".media = "); split(a[2], b, "hls\",\"audio_lang\":\"jaJP\",\"hardsub_lang\":null,\"url\":\""); split(b[2], c, "\",\"res"); gsub(/\\/, "", c[1]); split($0, d, "\"language\":\"enUS\",\"url\":\""); split(d[2], e, "\",\"title\":\"En"); gsub(/\\/, "", e[1]); printf c[1]; printf e[1]}')
	# If the above doesn't work, then it will probably be a line feed, but let's check the length anyway
	if [ ${#urls} -lt 2 ]; then
	echo "Oops, the awk didn't work."
	echo "This could be because:"
	printf "\t* The video didn't have BOTH Japanese dub/no-sub AND English (US) subtitles\n"
	printf "\t* The awk just plain sucks and failed to parse the page\n"
	printf "\t* Crunchyroll changed something on their page\n"
	printf "\t* You didn't link the right page\n"
	printf "\t* You don't have awk or wget installed for some reason (wget is more lightweight than curl)\n"
	printf "\t* Or, the page just failed to load (try diagnosing this with wget)\n"
	exit 1
	fi

	# $urls is valid at this point, so let's separate the URLs...
	# $url1 will be the m3u8, and $url2 will be the captions.
	# We're using awk to separate these as well because I'm stupid.
	url1=$(echo $urls \| awk '{split($0, a, "https://"); printf "https://" a[2]}')
	url2=$(echo $urls \| awk '{split($0, a, "https://"); printf "https://" a[3]}')
	# We don't need $urls anymore, so unset it...
	unset urls

	# $2 is the output MP4.
	# Let's print some bold text informing you that it's running ffmpeg now.
	printf "\n\033[1mGot URLs, running the underlying ffmpeg now!\033[0m\n\n"

	# okay now just run ffmpeg and die
	ffmpeg -i $url1 -i $url2 -c copy -c:s mov_text $2