kylemsguy/dl-livechat.sh

## dl-livechat.sh
#!/bin/bash

# NOTE: this is a very quick and dirty script. It saves the files as the <video id>.json
# You will need to map this back to the actual video if you want sane filenames
# TODO: merge this back to the main script and make sure the script can be run to make a partial download...

channel="Rosemi_Lovelock"
stream_list_ids="streams.txt"

# Enable this if you did not previously download the streams with the --download-archive flag set
#yt-dlp --skip-download --force-write-archive --download-archive "$stream_list_ids" https://www.youtube.com/@$channel/streams
for i in `cat $stream_list_ids`
    do
        if ! [ $i = "youtube" ]
            then
                chat_downloader "https://youtube.com/watch?v=$i" --output "livechat/$i.json" 2> $channel-livechat-stderr.log > $channel-livechat.log &> $channel-livechat-log-all.log
            fi
    done

## dl-mainyt.sh
#!/bin/sh
# Attempts to archive a YouTube channel while keeping the video files organized.
# Variables (configure to your liking)
channelname="Rosemi_Lovelock" # Replace with the channel's username.
outputfilename='%(upload_date)s-%(title)s-%(id)s.%(ext)s'
cookiepath='~/youtube.com_cookies.txt'

# Actual downloads. Enable what you need, and comment out what you don't need.
# Members content (does not include posts, just members streams)
yt-dlp --cookies $cookiepath --embed-thumbnail --embed-metadata --sub-langs all --download-archive "members.txt" "https://www.youtube.com/@${channelname}/membership" -o "members/$outputfilename"
# Shorts
yt-dlp --embed-thumbnail --embed-metadata --sub-langs all --download-archive "shorts.txt" "https://www.youtube.com/@${channelname}/shorts" -o "shorts/$outputfilename"
# Videos
yt-dlp --embed-thumbnail --embed-metadata --sub-langs all --download-archive "videos.txt" "https://www.youtube.com/@${channelname}/videos" -o "videos/$outputfilename"
# Livestreams (Not sure if live-chat is actually downloaded correctly with this)
yt-dlp --embed-thumbnail --embed-metadata --sub-langs all --download-archive "streams.txt" "https://www.youtube.com/@${channelname}/streams" -o "streams/$outputfilename"
# Playlists (can be skipped, if not needed)
#yt-dlp --embed-thumbnail --embed-metadata --sub-langs all --download-archive "playlists.txt" "https://www.youtube.com/@${channelname}/playlists" -o "playlists/$outputfilename"
# Community tab (doesn't include community posts, probably not useful at all)
#yt-dlp --embed-thumbnail --embed-metadata --sub-langs all --download-archive "community.txt" "https://www.youtube.com/@${channelname}/community" -o "community/$outputfilename"
# Metadata as JSON
yt-dlp --cookies $cookiepath --skip-download --write-comments --write-info-json --sub-langs all "https://www.youtube.com/@${channelname}" -o "json/$outputfilename"

## dl-twitter.sh
#!/bin/sh

# Quick and dirty script using gallery-dl to attempt to archive a twitter account

twitter="iamdevloper"

gallery-dl https://twitter.com/$twitter               --write-metadata -o skip=true
gallery-dl https://twitter.com/$twitter/media         --write-metadata -o skip=true
gallery-dl https://twitter.com/search?q=from:$twitter --write-metadata -o skip=true -o "directory=[\"twitter\",\"{$twitter}\"]"

## rename_livechat.py
# To be used with dl-livechat.sh along with downloaded JSON data for each video (specifically the filenames;
# Could be rewritten to use the video files themselves) to make the filenames a little more palatable.
# Either way, is expecting the filenames to use the following outputfilename='%(upload_date)s-%(title)s-%(id)s.%(ext)s'

import os
import sys
import shutil
import re

def list_json():
    return os.listdir("./json")

def list_livechat():
    return os.listdir("./livechat")

def get_id_dict(json_list):
    mapping = {}
    for s in json_list:
        try:
            title, vid = split_name(s)
        except ValueError:
            print(f"{s} is not for a video or stream. Skipping.", file=sys.stderr)
        mapping[vid] = title
    return mapping

def split_name(filename):
    # Will need updating if YouTube ever adds another character to the video id
    m = re.match(r"(\d+-.+?)-(.{11})\.info\.json", filename)
    if not m:
        raise ValueError("Filename doesn't match")
    return m.group(1), m.group(2)

def livechat_get_id(livechat_name):
    # very fragile but should work?
    return '.'.join(livechat_name.split(".")[:-1])

def rename_files(livechat, mapping):
    for i in livechat:
        vid = livechat_get_id(i)
        new_filename = f"{mapping[vid]}-{vid}.livechat.json"
        src = f"livechat/{i}"
        target = f"new_livechat/{new_filename}"
        print(f"Copying {src} to {target}")
        shutil.copyfile(src, target)

if __name__ == "__main__":
    json_files = list_json()
    livechat_files = list_livechat()
    mapping = get_id_dict(json_files)

    print(mapping)

    rename_files(livechat_files, mapping)
	#!/bin/bash

	# NOTE: this is a very quick and dirty script. It saves the files as the <video id>.json
	# You will need to map this back to the actual video if you want sane filenames
	# TODO: merge this back to the main script and make sure the script can be run to make a partial download...

	channel="Rosemi_Lovelock"
	stream_list_ids="streams.txt"

	# Enable this if you did not previously download the streams with the --download-archive flag set
	#yt-dlp --skip-download --force-write-archive --download-archive "$stream_list_ids" https://www.youtube.com/@$channel/streams
	for i in `cat $stream_list_ids`
	do
	if ! [ $i = "youtube" ]
	then
	chat_downloader "https://youtube.com/watch?v=$i" --output "livechat/$i.json" 2> $channel-livechat-stderr.log > $channel-livechat.log &> $channel-livechat-log-all.log
	fi
	done
	#!/bin/sh
	# Attempts to archive a YouTube channel while keeping the video files organized.
	# Variables (configure to your liking)
	channelname="Rosemi_Lovelock" # Replace with the channel's username.
	outputfilename='%(upload_date)s-%(title)s-%(id)s.%(ext)s'
	cookiepath='~/youtube.com_cookies.txt'

	# Actual downloads. Enable what you need, and comment out what you don't need.
	# Members content (does not include posts, just members streams)
	yt-dlp --cookies $cookiepath --embed-thumbnail --embed-metadata --sub-langs all --download-archive "members.txt" "https://www.youtube.com/@${channelname}/membership" -o "members/$outputfilename"
	# Shorts
	yt-dlp --embed-thumbnail --embed-metadata --sub-langs all --download-archive "shorts.txt" "https://www.youtube.com/@${channelname}/shorts" -o "shorts/$outputfilename"
	# Videos
	yt-dlp --embed-thumbnail --embed-metadata --sub-langs all --download-archive "videos.txt" "https://www.youtube.com/@${channelname}/videos" -o "videos/$outputfilename"
	# Livestreams (Not sure if live-chat is actually downloaded correctly with this)
	yt-dlp --embed-thumbnail --embed-metadata --sub-langs all --download-archive "streams.txt" "https://www.youtube.com/@${channelname}/streams" -o "streams/$outputfilename"
	# Playlists (can be skipped, if not needed)
	#yt-dlp --embed-thumbnail --embed-metadata --sub-langs all --download-archive "playlists.txt" "https://www.youtube.com/@${channelname}/playlists" -o "playlists/$outputfilename"
	# Community tab (doesn't include community posts, probably not useful at all)
	#yt-dlp --embed-thumbnail --embed-metadata --sub-langs all --download-archive "community.txt" "https://www.youtube.com/@${channelname}/community" -o "community/$outputfilename"
	# Metadata as JSON
	yt-dlp --cookies $cookiepath --skip-download --write-comments --write-info-json --sub-langs all "https://www.youtube.com/@${channelname}" -o "json/$outputfilename"
	#!/bin/sh

	# Quick and dirty script using gallery-dl to attempt to archive a twitter account

	twitter="iamdevloper"

	gallery-dl https://twitter.com/$twitter --write-metadata -o skip=true
	gallery-dl https://twitter.com/$twitter/media --write-metadata -o skip=true
	gallery-dl https://twitter.com/search?q=from:$twitter --write-metadata -o skip=true -o "directory=[\"twitter\",\"{$twitter}\"]"
	# To be used with dl-livechat.sh along with downloaded JSON data for each video (specifically the filenames;
	# Could be rewritten to use the video files themselves) to make the filenames a little more palatable.
	# Either way, is expecting the filenames to use the following outputfilename='%(upload_date)s-%(title)s-%(id)s.%(ext)s'

	import os
	import sys
	import shutil
	import re

	def list_json():
	return os.listdir("./json")

	def list_livechat():
	return os.listdir("./livechat")

	def get_id_dict(json_list):
	mapping = {}
	for s in json_list:
	try:
	title, vid = split_name(s)
	except ValueError:
	print(f"{s} is not for a video or stream. Skipping.", file=sys.stderr)
	mapping[vid] = title
	return mapping

	def split_name(filename):
	# Will need updating if YouTube ever adds another character to the video id
	m = re.match(r"(\d+-.+?)-(.{11})\.info\.json", filename)
	if not m:
	raise ValueError("Filename doesn't match")
	return m.group(1), m.group(2)

	def livechat_get_id(livechat_name):
	# very fragile but should work?
	return '.'.join(livechat_name.split(".")[:-1])

	def rename_files(livechat, mapping):
	for i in livechat:
	vid = livechat_get_id(i)
	new_filename = f"{mapping[vid]}-{vid}.livechat.json"
	src = f"livechat/{i}"
	target = f"new_livechat/{new_filename}"
	print(f"Copying {src} to {target}")
	shutil.copyfile(src, target)

	if __name__ == "__main__":
	json_files = list_json()
	livechat_files = list_livechat()
	mapping = get_id_dict(json_files)

	print(mapping)

	rename_files(livechat_files, mapping)