-
-
Save firexcy/30fbfbea49d9e0b5f8bc13f8a1ead19c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Replace the target with your output of `which yt-dlp` | |
alias yt-dlp="/opt/homebrew/bin/yt-dlp" | |
usage() { | |
cat <<EOF | |
Usage: $(basename "${BASH_SOURCE[0]}") [-t] [-l LANGUAGE_TAG] URL | |
A simple script to download YouTube subtitles with yt-dlp, and optionally | |
convert to plain text. | |
Available options: | |
-h, --help Print this help and exit. | |
-l, --lang Filter by language tag as defined in RFC 5646, only one tag is | |
permitted. | |
-t, --text Output a plain text file along with the subtitles file. | |
EOF | |
exit | |
} | |
get_params() { | |
text=0 | |
while :; do | |
case "${1-}" in | |
-h | --help) usage;; | |
-t | --text) text=1;; | |
-l | --lang) extra_lang="${2-}"; shift;; | |
-?*) echo "Unknown option: $1"; exit 1;; | |
*) break ;; | |
esac | |
shift | |
done | |
args=("$@") | |
if [ ${#args[@]} -ne 1 ]; then echo "Must provide one and only one URL"; exit 1; fi | |
return 0 | |
} | |
list_subs() { | |
url="$1" | |
list_subs=$(yt-dlp --list-subs "$url" | grep "vtt") | |
if [ -z "$extra_lang" ]; | |
then | |
echo "No designated language; falls back to English and Chinese" | |
autocaps=$(echo "$list_subs" | grep " from " | grep "^zh-Hans" | awk '{print "[autocaps]", $1, $2}' | |
echo "$list_subs" | grep " from " | grep "^en" | awk '{print "[autocaps]", $1, $2}' | |
) | |
subtitles=$(echo "$list_subs" | grep -v " from " | grep "^zh-Hans" | awk '{print "[subtitles]", $1, $2}' | |
echo "$list_subs" | grep -v " from " | grep "^en" | awk '{print "[subtitles]", $1, $2 }' | |
) | |
else | |
echo "Filtering by:" "$extra_lang" | |
autocaps=$(echo "$list_subs" | grep " from " | grep "^$extra_lang" | awk '{print "[autocaps]", $1, $2}' | |
) | |
subtitles=$(echo "$list_subs" | grep -v " from " | grep "^$extra_lang" | awk '{print "[subtitles]", $1, $2}' | |
) | |
fi | |
opts=() | |
while IFS= read -r l; do opts+=("$l"); done <<EOF | |
$autocaps | |
$subtitles | |
EOF | |
select lang in "${opts[@]}"; do | |
code=$(echo "$lang" | cut -f2 -d' ') | |
if echo "$lang" | grep -q "auto"; | |
then cmd="--write-auto-subs"; | |
else cmd="--write-subs"; | |
fi | |
break | |
done | |
} | |
dl_subs() { | |
yt-dlp "$1" --skip-download --sub-langs "$2" --output subs "$3" | |
} | |
vtt_to_txt() { | |
tail +4 "$1" |\ | |
grep -v " --> " |\ | |
grep . |\ | |
sed -E 's!<([0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}|c|\/c)>!!g' |\ | |
uniq | |
} | |
get_params "$@" | |
list_subs "${args[0]}" | |
dl_subs "$cmd" "$code" "$url" | |
if [ $text -eq 1 ]; then vtt_to_txt "subs.$code.vtt" > "text.$code.txt"; fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment