Skip to content

Instantly share code, notes, and snippets.

@firexcy
Last active November 10, 2022 09:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save firexcy/30fbfbea49d9e0b5f8bc13f8a1ead19c to your computer and use it in GitHub Desktop.
Save firexcy/30fbfbea49d9e0b5f8bc13f8a1ead19c to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
# Replace the target with your output of `which yt-dlp`
alias yt-dlp="/opt/homebrew/bin/yt-dlp"
usage() {
cat <<EOF
Usage: $(basename "${BASH_SOURCE[0]}") [-t] [-l LANGUAGE_TAG] URL
A simple script to download YouTube subtitles with yt-dlp, and optionally
convert to plain text.
Available options:
-h, --help Print this help and exit.
-l, --lang Filter by language tag as defined in RFC 5646, only one tag is
permitted.
-t, --text Output a plain text file along with the subtitles file.
EOF
exit
}
get_params() {
text=0
while :; do
case "${1-}" in
-h | --help) usage;;
-t | --text) text=1;;
-l | --lang) extra_lang="${2-}"; shift;;
-?*) echo "Unknown option: $1"; exit 1;;
*) break ;;
esac
shift
done
args=("$@")
if [ ${#args[@]} -ne 1 ]; then echo "Must provide one and only one URL"; exit 1; fi
return 0
}
list_subs() {
url="$1"
list_subs=$(yt-dlp --list-subs "$url" | grep "vtt")
if [ -z "$extra_lang" ];
then
echo "No designated language; falls back to English and Chinese"
autocaps=$(echo "$list_subs" | grep " from " | grep "^zh-Hans" | awk '{print "[autocaps]", $1, $2}'
echo "$list_subs" | grep " from " | grep "^en" | awk '{print "[autocaps]", $1, $2}'
)
subtitles=$(echo "$list_subs" | grep -v " from " | grep "^zh-Hans" | awk '{print "[subtitles]", $1, $2}'
echo "$list_subs" | grep -v " from " | grep "^en" | awk '{print "[subtitles]", $1, $2 }'
)
else
echo "Filtering by:" "$extra_lang"
autocaps=$(echo "$list_subs" | grep " from " | grep "^$extra_lang" | awk '{print "[autocaps]", $1, $2}'
)
subtitles=$(echo "$list_subs" | grep -v " from " | grep "^$extra_lang" | awk '{print "[subtitles]", $1, $2}'
)
fi
opts=()
while IFS= read -r l; do opts+=("$l"); done <<EOF
$autocaps
$subtitles
EOF
select lang in "${opts[@]}"; do
code=$(echo "$lang" | cut -f2 -d' ')
if echo "$lang" | grep -q "auto";
then cmd="--write-auto-subs";
else cmd="--write-subs";
fi
break
done
}
dl_subs() {
yt-dlp "$1" --skip-download --sub-langs "$2" --output subs "$3"
}
vtt_to_txt() {
tail +4 "$1" |\
grep -v " --> " |\
grep . |\
sed -E 's!<([0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}|c|\/c)>!!g' |\
uniq
}
get_params "$@"
list_subs "${args[0]}"
dl_subs "$cmd" "$code" "$url"
if [ $text -eq 1 ]; then vtt_to_txt "subs.$code.vtt" > "text.$code.txt"; fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment