Last active
May 10, 2022 21:44
-
-
Save chapmanjacobd/a755735b3437cd9a8489d826a3d1715d to your computer and use it in GitHub Desktop.
yt-dlp tracking errors example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# requires fish shell, moreutils | |
# usage: | |
# 1. Create files with the subreddits you want to archive, one subreddit per line: | |
# echo "Wellington" >> ~/mc/WellingtonFolder-reddit.txt | |
# or use `reddit-add WellingtonFolder Wellington` | |
# 2. Run `reddit-links-update WellingtonFolder` to scrape links | |
# 3. Run `reddit-get-videos WellingtonFolder` to start yt-dlp | |
function ytUREs | |
string replace --all " | |
" "|" "repetitive or misleading metadata | |
has already been recorded in the archive | |
ideo.*is private | |
Unable to extract cnn url | |
PornHd.*Unable to extract error message | |
You don't have permission to access this video. | |
Video is unavailable pending review | |
Video has been flagged for verification | |
This video has been disabled\$ | |
The uploader has not made this video available.\$ | |
This video is DRM protected | |
This video is protected by a password | |
This video requires payment to watch.\$ | |
Unable to download webpage | |
dashboard-only post | |
This video is only available to Music Premium members | |
The policy key provided does not permit this account or video | |
live stream recording | |
nudity or sexual content | |
policy on harassment and bullying | |
stream.* is offline\$ | |
: Video unavailable\$ | |
does not exist.\$ | |
has been removed\$ | |
Premieres in.*hours\$ | |
This clip is no longer available\$ | |
No media found\$ | |
No sources found for video | |
Video unavailable. This video is not available\$ | |
Resource temporarily unavailable | |
This video is unavailable | |
Unsupported URL | |
URL could be a direct video link | |
not a valid URL | |
not a video\$ | |
The page doesn't contain any tracks | |
removed by the uploader | |
blocked it on copyright grounds\$ | |
uploader has closed their.*account | |
account has been terminated because we received multiple | |
policy on violent or graphic content\$ | |
The channel does not have a .* tab\$ | |
policy on spam, deceptive practices, and scams\$ | |
This video does not exist, or has been deleted. | |
Community Guidelines | |
Terms of Service | |
This channel does not exist | |
account associated with this video | |
This video doesn't exist.\$ | |
Track not found\$ | |
Not found.\$ | |
Can't find object media for | |
o video formats found | |
o video in | |
o video on | |
certificate is not valid | |
CERTIFICATE_VERIFY_FAILED | |
HTTP Error 403: Forbidden\$ | |
code -404 | |
HTTP Error 404 | |
HTTPError 404 | |
HTTP Error 410 | |
HTTPError 410" | |
end | |
function ytREs | |
string replace --all " | |
" "|" "due to geo restriction | |
HTTP Error 429 | |
read operation timed out | |
Internal Server Error | |
Internal error encountered | |
Playlists that require authentication may not extract correctly without a successful webpage download | |
Main webpage is locked behind the login page | |
You need to log in to access this content | |
This video is only available for registered users | |
Could not send HEAD request | |
Unable to download JSON metadata | |
Failed to parse JSON Expecting | |
expected string or bytes-like object | |
in your country | |
geolocation | |
Connection refused | |
giving up after.*retries | |
Failed to download MPD manifest:\$ | |
not currently available\$ | |
copyright claim" | |
end | |
# Ignored errors | |
# used for checking if there are new errors to catch | |
# grep -Ev (ytREs) ~/.jobs/ytdlp_errors.txt | grep -Ev (ytUREs) | grep -Ev (ytIEs) | |
function ytIEs | |
string replace --all " | |
" "|" "hidden | |
Traceback | |
KeyboardInterrupt | |
Fatal Python error | |
list index out of range | |
Extract.* cookies | |
File .*, line .*, in | |
Requested format is not available. | |
fragment_filename_sanitized | |
no suitable InfoExtractor for URL | |
No such file or directory | |
: Downloading webpage\$ | |
: Extracting information\$ | |
: Requesting header\$ | |
Downloading .* metadata\$ | |
Downloading .* information\$ | |
Downloading .* manifest\$ | |
Determining source extension\$ | |
Downloading jwt token\$ | |
^\[info\] | |
^\[redirect\] | |
^\[Merger\] | |
^\[dashsegments\] | |
Finished downloading playlist | |
The last 30x error message was: | |
^Found\$ | |
NoneType | |
Creating a generic title instead | |
The channel is not currently live | |
clips are not currently supported. | |
Join this channel to get access to members-only content | |
Confirm you are on the latest version using | |
referenced before assignment | |
field is missing or empty | |
list index out of range | |
Interrupted by user | |
unable to open for writing: | |
You might want to use a VPN or a proxy server | |
maximum recursion depth exceeded | |
object does not support item assignment | |
encodings are not supported | |
object has no attribute | |
merged | |
Compressed file ended before the end-of-stream marker was reached | |
Falling back on generic | |
Some formats are possibly damaged | |
matching opening tag for closing p tag not found | |
the JSON object must be str, bytes or bytearray, not dict | |
The read operation timed out | |
Unable to recognize playlist. | |
Premieres in" | |
end | |
function reddit-get-videos --argument folder | |
cd ~/d/$folder/unsorted/ | |
for url in (combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder | sort --unique --ignore-case --random-sort) | |
yt $url &| tee -a ~/.jobs/reddit/done/ytdlp/$folder | read -z ytout | |
if test -z "$ytout" -o $pipestatus[1] -eq 0 | |
# no news is good news | |
echo "$url" >>"$error_archive" | |
else if echo "$ytout" | grep -qE (ytREs) | |
# RE matched | |
return | |
else if echo "$ytout" | grep -qE (ytUREs) | |
# URE matched | |
echo "$url" >>"$error_archive" | |
else | |
echo "$url" | |
echo "$ytout" | |
echo wtf is this ??? | |
end | |
end | |
# reduce the size of ~/.jobs/reddit/todo/$folder | |
combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder | sponge ~/.jobs/reddit/todo/$folder | |
end | |
function reddit-get-photos --argument folder | |
cd ~/d/$folder/unsorted/ | |
for url in (combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder | sort --unique --ignore-case --random-sort) | |
gallery-dl --quiet --download-archive $HOME/.local/share/gallerydl.sqlite3 $url | |
if test $status -eq 0 -o $status -eq 4 -o $status -eq 8 | |
echo $url >>~/.jobs/reddit/done/$folder | |
else | |
yt $url &| tee -a ~/.jobs/reddit/done/ytdlp/$folder | read -z ytout | |
if test -z "$ytout" -o $pipestatus[1] -eq 0 | |
# no news is good news | |
echo "$url" >>"$error_archive" | |
else if echo "$ytout" | grep -qE (ytREs) | |
# RE matched | |
return | |
else if echo "$ytout" | grep -qE (ytUREs) | |
# URE matched | |
echo "$url" >>"$error_archive" | |
else | |
echo "$url" | |
echo "$ytout" | |
echo wtf is this ??? | |
end | |
end | |
end | |
# reduce the size of ~/.jobs/reddit/todo/$folder | |
combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder | sponge ~/.jobs/reddit/todo/$folder | |
end | |
# you will need to create the initial done files for each of your folders: | |
for folder in WellingtonFolder 95_Inspiration 81_New_Music 71_Mealtime_Videos | |
touch ~/.jobs/reddit/done/$folder | |
end | |
function reddit-add --argument folder subr | |
set curatifile "~/mc/""$folder""-reddit.txt" | |
set dfolder ~/d/$folder/unsorted/ | |
if not test (count $argv) -eq 2 | |
return 5 | |
end | |
if grep -qEix $subr $curatifile | |
return 1 | |
end | |
if not test -d $dfolder | |
return 2 | |
end | |
echo $subr >>$curatifile | |
reddit-links "$subr" | grep '^http' | sed -e 's|&list.*||' -e 's|\?list.*||' >>~/.jobs/reddit/todo/$folder | |
end | |
function reddit-links | |
# maybe there is a better tool to do this? | |
# requires making some changes to the code | |
# see here before using: https://github.com/aliparlakci/bulk-downloader-for-reddit/issues/627 | |
cd ~/github/o/bulk-downloader-for-reddit/ | |
. $(pipenv --venv)/bin/activate.fish | |
python -m bdfr download bdfr/ --disable-module SelfPost -S top --subreddit $argv | |
python -m bdfr download bdfr/ --disable-module SelfPost -S top -t year --subreddit $argv | |
python -m bdfr download bdfr/ --disable-module SelfPost -S top -t month -L 400 --subreddit $argv | |
end | |
function reddit-links-update --argument folder | |
set curatifile "~/mc/""$folder""-reddit.txt" | |
set subr (cat $curatifile | string join ', ') | |
reddit-links "$subr" | grep '^http' | sed -e 's|&list.*||' -e 's|\?list.*||' -e 's|.*youtube.*results.*||' >>~/.jobs/reddit/todo/$folder | |
end | |
function redditstatus | |
for folder in ~/.jobs/reddit/todo/* | |
set folder (basename $folder) | |
printf '%s \t' $folder | |
combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder | wc -l | |
end | |
end | |
function _ytdl | |
yt-dlp --quiet --cookies-from-browser firefox -i -o "%(uploader)s/%(title).200B [%(id)s].%(ext)s" \ | |
--download-archive ~/.local/share/yt_archive.txt --retries 13 --extractor-retries 13 \ | |
--reject-title "Trailer|Preview|Teaser|Promo|Live Stream|Crypto| Meetup| Montage|Bitcoin|Makeup|Apology|Clip" $argv | |
end | |
function ytdl | |
_ytdl --youtube-skip-dash-manifest --youtube-skip-hls-manifest $argv | |
or _ytdl $argv | |
end | |
function yt | |
ytdl -f bestvideo[height<=576]+bestaudio/best[height<=576] --write-sub --write-auto-sub \ | |
--sub-lang en,EN,eng,ENG,en-gb,en-us,en-GB,en-US,EN-GB,EN-US,english,English,ENGLISH,en-CA,en-IE \ | |
--sub-format srt/sub/ssa/vtt/ass/best -- $argv | |
end | |
complete -f -k -c reddit-links-update -c reddit-add -c reddit-get-sounds -c reddit-get-videos -c reddit-get-photos -a "(__fish_complete_directories ~/d/ DFOLDER | sed 's|/home/xk/d/\(.*\)/|\1|' )" | |
complete -f -k -c reddit-add -c reddit-get-sounds -c reddit-get-videos -c reddit-get-photos -a "(cat ~/mc/*reddit.txt)" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment