Skip to content

Instantly share code, notes, and snippets.

@branchmispredictor
Created January 7, 2024 18:06
Show Gist options
  • Save branchmispredictor/6bfb0c4f6967d97e514a926a555866fe to your computer and use it in GitHub Desktop.
Save branchmispredictor/6bfb0c4f6967d97e514a926a555866fe to your computer and use it in GitHub Desktop.
Youtube Archiver Script
#!/bin/bash
# Youtube Archiver v0.1.0
#
# Requires `yt-dlp` and `jq` to be installed and on $PATH
#
# Downloads videos into `./recent` and rechecks them within 30 days because
# it may take YT some time to encode the highest quality version of a video.
# Will move any replaced videos in `./replaced`
# NOTE: Script depends on this specific folder and file naming format, do not change!
set -euo pipefail
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
ARCHIVE_DIR="$SCRIPT_DIR/archive"
REPLACED_DIR="$SCRIPT_DIR/replaced"
function yt_dlp_with_args() {
yt-dlp \
--verbose \
--force-ipv4 \
--sleep-requests 1 \
--sleep-interval 5 \
--max-sleep-interval 30 \
--no-continue \
--no-overwrites \
--add-metadata \
--parse-metadata "%(title)s:%(meta_title)s" \
--parse-metadata "%(uploader)s:%(meta_artist)s" \
--write-description \
--write-info-json \
--write-annotations \
--write-thumbnail \
--embed-thumbnail \
--all-subs \
--embed-subs \
--check-formats \
--concurrent-fragments 3 \
--match-filter "!is_live & !live" \
--output "%(uploader)s/%(uploader)s - %(upload_date)s - %(title)s/%(uploader)s - %(upload_date)s - %(title)s [%(id)s].%(ext)s" \
--merge-output-format "mkv" \
--mtime \
"$@"
}
function archive_channels() {
pushd "$ARCHIVE_DIR" > /dev/null
printf "\n[ARCHIVE CHANNELS]\n\n"
yt_dlp_with_args \
--download-archive archive.log \
--batch-file "$SCRIPT_DIR/channels.txt" \
2>&1 | tee output.log
popd > /dev/null
}
function refresh_recent_videos() {
pushd "$ARCHIVE_DIR" > /dev/null
printf "\n[REFRESH RECENT VIDEOS]\n\n"
# Find any files downloaded within the last 30 days and created in the last 31 days
find . -name "*.mkv" -mtime -30 -print0 |
while IFS= read -r -d '' recent_vid; do
recent_folder=$(dirname "$recent_vid")
recent_prefix=$(basename "$recent_vid" ".mkv")
recent_json_file="$recent_folder/$recent_prefix.info.json"
current_format_id=$(jq -r '.format_id' "$recent_json_file")
url=$(jq -r '.webpage_url' "$recent_json_file")
new_format_id=$(yt-dlp --print format_id "$url")
if [ "$current_format_id" != "$new_format_id" ]; then
echo "Need to reconcile video at $recent_folder: new format: $new_format_id, old format: $current_format_id"
replacement_folder="$REPLACED_DIR/$recent_folder-$(date +%Y%m%d)"
mkdir -p "$replacement_folder"
mv "$recent_folder" "$replacement_folder"
yt_dlp_with_args "$url"
fi
done
popd > /dev/null
}
archive_channels_failed="0"
refresh_recent_videos="0"
archive_channels || archive_channels_failed="1"
refresh_recent_videos || refresh_recent_videos="1"
echo "Jobs status:"
echo "archive_channels_failed: $archive_channels_failed"
echo "refresh_recent_videos: $refresh_recent_videos"
if [ "$archive_channels_failed" == "1" ] || [ "$refresh_recent_videos" == "1" ]; then
exit 1
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment