chapmanjacobd/reddit.fish

## reddit.fish
# requires fish shell, moreutils
# usage:
#  1. Create files with the subreddits you want to archive, one subreddit per line:
#          echo "Wellington" >> ~/mc/WellingtonFolder-reddit.txt
#       or use `reddit-add WellingtonFolder Wellington`
#  2. Run `reddit-links-update WellingtonFolder` to scrape links
#  3. Run `reddit-get-videos WellingtonFolder` to start yt-dlp

function ytUREs
    string replace --all "
" "|" "repetitive or misleading metadata
has already been recorded in the archive
ideo.*is private
Unable to extract cnn url
PornHd.*Unable to extract error message
You don't have permission to access this video.
Video is unavailable pending review
Video has been flagged for verification
This video has been disabled\$
The uploader has not made this video available.\$
This video is DRM protected
This video is protected by a password
This video requires payment to watch.\$
Unable to download webpage
dashboard-only post
This video is only available to Music Premium members
The policy key provided does not permit this account or video
live stream recording
nudity or sexual content
policy on harassment and bullying
stream.* is offline\$
: Video unavailable\$
 does not exist.\$
 has been removed\$
Premieres in.*hours\$
This clip is no longer available\$
No media found\$
No sources found for video
Video unavailable. This video is not available\$
Resource temporarily unavailable
This video is unavailable
Unsupported URL
URL could be a direct video link
not a valid URL
not a video\$
The page doesn't contain any tracks
removed by the uploader
blocked it on copyright grounds\$
uploader has closed their.*account
account has been terminated because we received multiple
policy on violent or graphic content\$
The channel does not have a .* tab\$
policy on spam, deceptive practices, and scams\$
This video does not exist, or has been deleted.
Community Guidelines
Terms of Service
This channel does not exist
account associated with this video
This video doesn't exist.\$
Track not found\$
Not found.\$
Can't find object media for
o video formats found
o video in
o video on
certificate is not valid
CERTIFICATE_VERIFY_FAILED
HTTP Error 403: Forbidden\$
code -404
HTTP Error 404
HTTPError 404
HTTP Error 410
HTTPError 410"
end

function ytREs
    string replace --all "
" "|" "due to geo restriction
HTTP Error 429
read operation timed out
Internal Server Error
Internal error encountered
Playlists that require authentication may not extract correctly without a successful webpage download
Main webpage is locked behind the login page
You need to log in to access this content
This video is only available for registered users
Could not send HEAD request
Unable to download JSON metadata
Failed to parse JSON Expecting
expected string or bytes-like object
in your country
geolocation
Connection refused
giving up after.*retries
Failed to download MPD manifest:\$
not currently available\$
copyright claim"
end

# Ignored errors
# used for checking if there are new errors to catch
# grep -Ev (ytREs) ~/.jobs/ytdlp_errors.txt | grep -Ev (ytUREs) | grep -Ev (ytIEs)
function ytIEs
    string replace --all "
" "|" "hidden
Traceback
KeyboardInterrupt
Fatal Python error
list index out of range
Extract.* cookies
File .*, line .*, in
Requested format is not available.
fragment_filename_sanitized
no suitable InfoExtractor for URL
No such file or directory
: Downloading webpage\$
: Extracting information\$
: Requesting header\$
Downloading .* metadata\$
Downloading .* information\$
Downloading .* manifest\$
Determining source extension\$
Downloading jwt token\$
^\[info\]
^\[redirect\]
^\[Merger\]
^\[dashsegments\]
Finished downloading playlist
The last 30x error message was:
^Found\$
NoneType
Creating a generic title instead
The channel is not currently live
clips are not currently supported.
Join this channel to get access to members-only content
Confirm you are on the latest version using
referenced before assignment
field is missing or empty
list index out of range
Interrupted by user
unable to open for writing:
You might want to use a VPN or a proxy server
maximum recursion depth exceeded
object does not support item assignment
encodings are not supported
object has no attribute
merged
Compressed file ended before the end-of-stream marker was reached
Falling back on generic
Some formats are possibly damaged
matching opening tag for closing p tag not found
the JSON object must be str, bytes or bytearray, not dict
The read operation timed out
Unable to recognize playlist.
Premieres in"
end

function reddit-get-videos --argument folder
    cd ~/d/$folder/unsorted/
    for url in (combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder | sort --unique --ignore-case --random-sort)
        yt $url &| tee -a ~/.jobs/reddit/done/ytdlp/$folder | read -z ytout

        if test -z "$ytout" -o $pipestatus[1] -eq 0
            # no news is good news
            echo "$url" >>"$error_archive"
        else if echo "$ytout" | grep -qE (ytREs)
            # RE matched
            return
        else if echo "$ytout" | grep -qE (ytUREs)
            # URE matched
            echo "$url" >>"$error_archive"
        else
            echo "$url"
            echo "$ytout"
            echo wtf is this ???
        end
    end

    # reduce the size of ~/.jobs/reddit/todo/$folder
    combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder | sponge ~/.jobs/reddit/todo/$folder
end

function reddit-get-photos --argument folder
    cd ~/d/$folder/unsorted/
    for url in (combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder | sort --unique --ignore-case --random-sort)
        gallery-dl --quiet --download-archive $HOME/.local/share/gallerydl.sqlite3 $url

        if test $status -eq 0 -o $status -eq 4 -o $status -eq 8
            echo $url >>~/.jobs/reddit/done/$folder
        else
            yt $url &| tee -a ~/.jobs/reddit/done/ytdlp/$folder | read -z ytout

            if test -z "$ytout" -o $pipestatus[1] -eq 0
                # no news is good news
                echo "$url" >>"$error_archive"
            else if echo "$ytout" | grep -qE (ytREs)
                # RE matched
                return
            else if echo "$ytout" | grep -qE (ytUREs)
                # URE matched
                echo "$url" >>"$error_archive"
            else
                echo "$url"
                echo "$ytout"
                echo wtf is this ???
            end
        end
    end

    # reduce the size of ~/.jobs/reddit/todo/$folder
    combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder | sponge ~/.jobs/reddit/todo/$folder
end

# you will need to create the initial done files for each of your folders:
for folder in WellingtonFolder 95_Inspiration 81_New_Music 71_Mealtime_Videos
   touch ~/.jobs/reddit/done/$folder
end

function reddit-add --argument folder subr
    set curatifile "~/mc/""$folder""-reddit.txt"
    set dfolder ~/d/$folder/unsorted/

    if not test (count $argv) -eq 2
        return 5
    end

    if grep -qEix $subr $curatifile
        return 1
    end

    if not test -d $dfolder
        return 2
    end

    echo $subr >>$curatifile

    reddit-links "$subr" | grep '^http' | sed -e 's|&list.*||' -e 's|\?list.*||' >>~/.jobs/reddit/todo/$folder
end

function reddit-links
    # maybe there is a better tool to do this?
    # requires making some changes to the code
    # see here before using: https://github.com/aliparlakci/bulk-downloader-for-reddit/issues/627
    cd ~/github/o/bulk-downloader-for-reddit/
    . $(pipenv --venv)/bin/activate.fish
    python -m bdfr download bdfr/ --disable-module SelfPost -S top --subreddit $argv
    python -m bdfr download bdfr/ --disable-module SelfPost -S top -t year --subreddit $argv
    python -m bdfr download bdfr/ --disable-module SelfPost -S top -t month -L 400 --subreddit $argv
end

function reddit-links-update --argument folder
    set curatifile "~/mc/""$folder""-reddit.txt"
    set subr (cat $curatifile | string join ', ')
    reddit-links "$subr" | grep '^http' | sed -e 's|&list.*||' -e 's|\?list.*||' -e 's|.*youtube.*results.*||' >>~/.jobs/reddit/todo/$folder
end


function redditstatus
    for folder in ~/.jobs/reddit/todo/*
        set folder (basename $folder)
        printf '%s \t' $folder
        combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder | wc -l
    end
end

function _ytdl
    yt-dlp --quiet --cookies-from-browser firefox -i -o "%(uploader)s/%(title).200B [%(id)s].%(ext)s" \
       --download-archive ~/.local/share/yt_archive.txt --retries 13 --extractor-retries 13 \
       --reject-title "Trailer|Preview|Teaser|Promo|Live Stream|Crypto| Meetup| Montage|Bitcoin|Makeup|Apology|Clip" $argv
end

function ytdl
    _ytdl --youtube-skip-dash-manifest --youtube-skip-hls-manifest $argv
    or _ytdl $argv
end

function yt
    ytdl -f bestvideo[height<=576]+bestaudio/best[height<=576] --write-sub --write-auto-sub \
        --sub-lang en,EN,eng,ENG,en-gb,en-us,en-GB,en-US,EN-GB,EN-US,english,English,ENGLISH,en-CA,en-IE \
        --sub-format srt/sub/ssa/vtt/ass/best -- $argv
end

complete -f -k -c reddit-links-update -c reddit-add -c reddit-get-sounds -c reddit-get-videos -c reddit-get-photos -a "(__fish_complete_directories ~/d/ DFOLDER | sed 's|/home/xk/d/\(.*\)/|\1|' )"
complete -f -k -c reddit-add -c reddit-get-sounds -c reddit-get-videos -c reddit-get-photos -a "(cat ~/mc/*reddit.txt)"
	# requires fish shell, moreutils
	# usage:
	# 1. Create files with the subreddits you want to archive, one subreddit per line:
	# echo "Wellington" >> ~/mc/WellingtonFolder-reddit.txt
	# or use `reddit-add WellingtonFolder Wellington`
	# 2. Run `reddit-links-update WellingtonFolder` to scrape links
	# 3. Run `reddit-get-videos WellingtonFolder` to start yt-dlp

	function ytUREs
	string replace --all "
	" "\|" "repetitive or misleading metadata
	has already been recorded in the archive
	ideo.*is private
	Unable to extract cnn url
	PornHd.*Unable to extract error message
	You don't have permission to access this video.
	Video is unavailable pending review
	Video has been flagged for verification
	This video has been disabled\$
	The uploader has not made this video available.\$
	This video is DRM protected
	This video is protected by a password
	This video requires payment to watch.\$
	Unable to download webpage
	dashboard-only post
	This video is only available to Music Premium members
	The policy key provided does not permit this account or video
	live stream recording
	nudity or sexual content
	policy on harassment and bullying
	stream.* is offline\$
	: Video unavailable\$
	does not exist.\$
	has been removed\$
	Premieres in.*hours\$
	This clip is no longer available\$
	No media found\$
	No sources found for video
	Video unavailable. This video is not available\$
	Resource temporarily unavailable
	This video is unavailable
	Unsupported URL
	URL could be a direct video link
	not a valid URL
	not a video\$
	The page doesn't contain any tracks
	removed by the uploader
	blocked it on copyright grounds\$
	uploader has closed their.*account
	account has been terminated because we received multiple
	policy on violent or graphic content\$
	The channel does not have a .* tab\$
	policy on spam, deceptive practices, and scams\$
	This video does not exist, or has been deleted.
	Community Guidelines
	Terms of Service
	This channel does not exist
	account associated with this video
	This video doesn't exist.\$
	Track not found\$
	Not found.\$
	Can't find object media for
	o video formats found
	o video in
	o video on
	certificate is not valid
	CERTIFICATE_VERIFY_FAILED
	HTTP Error 403: Forbidden\$
	code -404
	HTTP Error 404
	HTTPError 404
	HTTP Error 410
	HTTPError 410"
	end

	function ytREs
	string replace --all "
	" "\|" "due to geo restriction
	HTTP Error 429
	read operation timed out
	Internal Server Error
	Internal error encountered
	Playlists that require authentication may not extract correctly without a successful webpage download
	Main webpage is locked behind the login page
	You need to log in to access this content
	This video is only available for registered users
	Could not send HEAD request
	Unable to download JSON metadata
	Failed to parse JSON Expecting
	expected string or bytes-like object
	in your country
	geolocation
	Connection refused
	giving up after.*retries
	Failed to download MPD manifest:\$
	not currently available\$
	copyright claim"
	end

	# Ignored errors
	# used for checking if there are new errors to catch
	# grep -Ev (ytREs) ~/.jobs/ytdlp_errors.txt \| grep -Ev (ytUREs) \| grep -Ev (ytIEs)
	function ytIEs
	string replace --all "
	" "\|" "hidden
	Traceback
	KeyboardInterrupt
	Fatal Python error
	list index out of range
	Extract.* cookies
	File ., line ., in
	Requested format is not available.
	fragment_filename_sanitized
	no suitable InfoExtractor for URL
	No such file or directory
	: Downloading webpage\$
	: Extracting information\$
	: Requesting header\$
	Downloading .* metadata\$
	Downloading .* information\$
	Downloading .* manifest\$
	Determining source extension\$
	Downloading jwt token\$
	^\[info\]
	^\[redirect\]
	^\[Merger\]
	^\[dashsegments\]
	Finished downloading playlist
	The last 30x error message was:
	^Found\$
	NoneType
	Creating a generic title instead
	The channel is not currently live
	clips are not currently supported.
	Join this channel to get access to members-only content
	Confirm you are on the latest version using
	referenced before assignment
	field is missing or empty
	list index out of range
	Interrupted by user
	unable to open for writing:
	You might want to use a VPN or a proxy server
	maximum recursion depth exceeded
	object does not support item assignment
	encodings are not supported
	object has no attribute
	merged
	Compressed file ended before the end-of-stream marker was reached
	Falling back on generic
	Some formats are possibly damaged
	matching opening tag for closing p tag not found
	the JSON object must be str, bytes or bytearray, not dict
	The read operation timed out
	Unable to recognize playlist.
	Premieres in"
	end

	function reddit-get-videos --argument folder
	cd ~/d/$folder/unsorted/
	for url in (combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder \| sort --unique --ignore-case --random-sort)
	yt $url &\| tee -a ~/.jobs/reddit/done/ytdlp/$folder \| read -z ytout

	if test -z "$ytout" -o $pipestatus[1] -eq 0
	# no news is good news
	echo "$url" >>"$error_archive"
	else if echo "$ytout" \| grep -qE (ytREs)
	# RE matched
	return
	else if echo "$ytout" \| grep -qE (ytUREs)
	# URE matched
	echo "$url" >>"$error_archive"
	else
	echo "$url"
	echo "$ytout"
	echo wtf is this ???
	end
	end

	# reduce the size of ~/.jobs/reddit/todo/$folder
	combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder \| sponge ~/.jobs/reddit/todo/$folder
	end

	function reddit-get-photos --argument folder
	cd ~/d/$folder/unsorted/
	for url in (combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder \| sort --unique --ignore-case --random-sort)
	gallery-dl --quiet --download-archive $HOME/.local/share/gallerydl.sqlite3 $url

	if test $status -eq 0 -o $status -eq 4 -o $status -eq 8
	echo $url >>~/.jobs/reddit/done/$folder
	else
	yt $url &\| tee -a ~/.jobs/reddit/done/ytdlp/$folder \| read -z ytout

	if test -z "$ytout" -o $pipestatus[1] -eq 0
	# no news is good news
	echo "$url" >>"$error_archive"
	else if echo "$ytout" \| grep -qE (ytREs)
	# RE matched
	return
	else if echo "$ytout" \| grep -qE (ytUREs)
	# URE matched
	echo "$url" >>"$error_archive"
	else
	echo "$url"
	echo "$ytout"
	echo wtf is this ???
	end
	end
	end

	# reduce the size of ~/.jobs/reddit/todo/$folder
	combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder \| sponge ~/.jobs/reddit/todo/$folder
	end

	# you will need to create the initial done files for each of your folders:
	for folder in WellingtonFolder 95_Inspiration 81_New_Music 71_Mealtime_Videos
	touch ~/.jobs/reddit/done/$folder
	end

	function reddit-add --argument folder subr
	set curatifile "~/mc/""$folder""-reddit.txt"
	set dfolder ~/d/$folder/unsorted/

	if not test (count $argv) -eq 2
	return 5
	end

	if grep -qEix $subr $curatifile
	return 1
	end

	if not test -d $dfolder
	return 2
	end

	echo $subr >>$curatifile

	reddit-links "$subr" \| grep '^http' \| sed -e 's\|&list.\|\|' -e 's\|\?list.\|\|' >>~/.jobs/reddit/todo/$folder
	end

	function reddit-links
	# maybe there is a better tool to do this?
	# requires making some changes to the code
	# see here before using: https://github.com/aliparlakci/bulk-downloader-for-reddit/issues/627
	cd ~/github/o/bulk-downloader-for-reddit/
	. $(pipenv --venv)/bin/activate.fish
	python -m bdfr download bdfr/ --disable-module SelfPost -S top --subreddit $argv
	python -m bdfr download bdfr/ --disable-module SelfPost -S top -t year --subreddit $argv
	python -m bdfr download bdfr/ --disable-module SelfPost -S top -t month -L 400 --subreddit $argv
	end

	function reddit-links-update --argument folder
	set curatifile "~/mc/""$folder""-reddit.txt"
	set subr (cat $curatifile \| string join ', ')
	reddit-links "$subr" \| grep '^http' \| sed -e 's\|&list.\|\|' -e 's\|\?list.\|\|' -e 's\|.youtube.results.*\|\|' >>~/.jobs/reddit/todo/$folder
	end


	function redditstatus
	for folder in ~/.jobs/reddit/todo/*
	set folder (basename $folder)
	printf '%s \t' $folder
	combine ~/.jobs/reddit/todo/$folder not ~/.jobs/reddit/done/$folder \| wc -l
	end
	end

	function _ytdl
	yt-dlp --quiet --cookies-from-browser firefox -i -o "%(uploader)s/%(title).200B [%(id)s].%(ext)s" \
	--download-archive ~/.local/share/yt_archive.txt --retries 13 --extractor-retries 13 \
	--reject-title "Trailer\|Preview\|Teaser\|Promo\|Live Stream\|Crypto\| Meetup\| Montage\|Bitcoin\|Makeup\|Apology\|Clip" $argv
	end

	function ytdl
	_ytdl --youtube-skip-dash-manifest --youtube-skip-hls-manifest $argv
	or _ytdl $argv
	end

	function yt
	ytdl -f bestvideo[height<=576]+bestaudio/best[height<=576] --write-sub --write-auto-sub \
	--sub-lang en,EN,eng,ENG,en-gb,en-us,en-GB,en-US,EN-GB,EN-US,english,English,ENGLISH,en-CA,en-IE \
	--sub-format srt/sub/ssa/vtt/ass/best -- $argv
	end

	complete -f -k -c reddit-links-update -c reddit-add -c reddit-get-sounds -c reddit-get-videos -c reddit-get-photos -a "(__fish_complete_directories ~/d/ DFOLDER \| sed 's\|/home/xk/d/\(.*\)/\|\1\|' )"
	complete -f -k -c reddit-add -c reddit-get-sounds -c reddit-get-videos -c reddit-get-photos -a "(cat ~/mc/*reddit.txt)"