Skip to content

Instantly share code, notes, and snippets.

@khanh101
Last active December 20, 2023 06:43
Show Gist options
  • Save khanh101/3b91ef9866c3940e84d7904aae3005d9 to your computer and use it in GitHub Desktop.
Save khanh101/3b91ef9866c3940e84d7904aae3005d9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
dir="~/playlist/"
link="<url to playlist>"
yt-dlp \
--download-archive "$dir/playlist.txt" \
--ignore-errors \
--geo-bypass \
--output "$dir/%(title)s.%(id)s.%(ext)s" \
--no-overwrites \
--extract-audio --audio-format "aac" --audio-quality 0 \
--no-post-overwrites \
$link
#!/usr/bin/env python
import os
import sys
import yt_dlp
import concurrent.futures
STATE_PATH = "/home/khanh/vault/large/youtube/download_youtube.txt"
if not os.path.exists(os.path.dirname(STATE_PATH)):
os.mkdir(os.path.dirname(STATE_PATH))
ROOT_DIR = "/home/khanh/vault/large/youtube"
DOWNLOAD_LIST = [
"https://www.youtube.com/playlist?list=PL5octn-l9KEX31HGPCZRqUt_a0mJcYN-a",
"https://www.youtube.com/playlist?list=PLDdlwI0EwS7TpHbfjMCjvm62n9WA-3Z-l",
]
DOWNLOAD_LIST = list(set(DOWNLOAD_LIST))
MAX_WORKERS = len(DOWNLOAD_LIST)
TEMPLATE = "%(title)s.%(id)s.%(ext)s"
def get_id_from_path(path: str) -> str:
assert TEMPLATE == "%(title)s.%(id)s.%(ext)s"
name = os.path.basename(path)
id = name.split(".")[-2]
return id
class Logger:
def debug(self, msg: str):
sys.stdout.write(msg + "\n")
def warning(self, msg: str):
sys.stdout.write(msg + "\n")
def error(self, msg: str):
sys.stderr.write(msg + "\n")
logger = Logger()
done = []
remaining = []
def download_youtube(root_dir: str, url: str):
id_to_url = {}
ydl_opts = {
"geo_bypass": True,
"logger": logger,
"verbose": True,
"ignoreerrors": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, process=False, download=False)
title = info["title"]
dir = os.path.join(root_dir, title)
for e in info["entries"]:
id_to_url[e["id"]] = e["url"]
skip_id_set = set()
if not os.path.exists(dir):
os.makedirs(dir)
for name in os.listdir(dir):
id = get_id_from_path(name)
skip_id_set.add(id)
filter_url_list = []
for id, url in id_to_url.items():
if id not in skip_id_set:
filter_url_list.append(url)
global done, remaining
remaining.append(dir)
with open(STATE_PATH, "w") as f:
f.write("# DONE\n")
for path in sorted(done):
f.write(path + "\n")
f.write("# REMAINING\n")
for path in sorted(remaining):
f.write(path + "\n")
ydl_opts = {
"outtmpl": os.path.join(dir, TEMPLATE),
"geo_bypass": True,
"logger": logger,
"verbose": True,
"ignoreerrors": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download(filter_url_list)
remaining.remove(dir)
done.append(dir)
with open(STATE_PATH, "w") as f:
f.write("# DONE\n")
for path in sorted(done):
f.write(path + "\n")
f.write("# REMAINING\n")
for path in sorted(remaining):
f.write(path + "\n")
if __name__ == "__main__":
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
future_list = [executor.submit(download_youtube, ROOT_DIR, url) for url in DOWNLOAD_LIST]
for future in concurrent.futures.as_completed(future_list):
try:
future.result()
except Exception as e:
print(f"exception: {e}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment