Skip to content

Instantly share code, notes, and snippets.

@StarkGang
Last active November 24, 2022 05:26
Show Gist options
  • Save StarkGang/ad3c71f70097206fc81c0fd7958fe367 to your computer and use it in GitHub Desktop.
Save StarkGang/ad3c71f70097206fc81c0fd7958fe367 to your computer and use it in GitHub Desktop.
Scheduler based multithreaded youtube_dl downloader
import os
from dateutil.tz import tzlocal
import time
import logging
import traceback
import multiprocessing
from apscheduler.schedulers.background import BlockingScheduler
from concurrent.futures.thread import ThreadPoolExecutor
import http.client as httplib
from concurrent.futures import as_completed
import contextlib
import youtube_dl
import datetime
import sys
class Helper:
def __init__(self) -> None:
pass
def try_returning_a_int(self, arg: str):
try:
return int(arg)
except ValueError:
return str(arg).lower()
def strip_time_from_input(self, arg: str):
try:
dt_obj = datetime.datetime.strptime(arg, '%I:%M:%p')
except Exception:
return None, None
return dt_obj.hour, (dt_obj.minute or 0)
def is_interactive_shell(self):
return sys.__stdin__.isatty()
def give_out_bool(self, decision):
digit_as_dec = self.try_returning_a_int(decision)
if digit_as_dec in [1, 'yes', 'ya', 'yeh', 'yep', 'y', 'ok', 'haan']:
return True
return False
class Downloader:
def __init__(self) -> None:
self.urls = []
self.task_len = 0
self.GOOGLE_DNS = "8.8.8.8"
self.executor = ThreadPoolExecutor(max_workers=multiprocessing.cpu_count() * 2)
def logging_setup(self):
logging.basicConfig(
level=logging.INFO,
datefmt="[%d/%m/%Y %H:%M:%S]",
format="%(asctime)s - [Rogue-Downloader] >> %(levelname)s << %(message)s",
handlers=[logging.FileHandler("rogue-dl.log"), logging.StreamHandler()],
)
logging.getLogger("apscheduler").setLevel(logging.WARNING)
@staticmethod
def log(
message: str = None,
level=logging.INFO,
logger: logging.Logger = logging.getLogger(__module__),
) -> str:
logger.log(level, message or traceback.format_exc())
return message or traceback.format_exc()
def rem_link(self, link):
with open('./urls.txt', 'r') as readable:
content = readable.readlines()
if link in content:
content.remove(link)
with open("./urls.txt", "w") as writeable:
writeable.writelines(content)
def read_file_return_list(self, file_path: str = './urls.txt'):
file_obj = open(file_path)
urls = [url.strip("\n") for url in file_obj]
self.urls = urls
def downloader(self):
conn = False
self.log('Checking For Data Connection...')
while not conn:
conn = self.data_check()
self.log('Data Connection Not Found.. Sleeping for 5s and retrying..')
time.sleep(5)
self.log("Connection initiated... Starting Program.")
self.log(f'Function Called at : {time.time()}')
time_st = time.perf_counter()
logging.info('Fetching URL(s) from the file and storing in :memory: \n')
self.read_file_return_list()
logging.info(f'Above Task | [Completed in {round(time.perf_counter() - time_st)}s] \n')
logging.info('Begining new download process in few seconds :\n ')
urls = self.urls
futures = [self.executor.submit(self.download, url) for url in urls]
for future in as_completed(futures):
if bool_ := future.result():
self.log(f'Download Success : #{self.task_len}')
else:
self.log(f'Download Failed : #{self.task_len}')
logging.info(f'Above Task | [Completed in {round(time.perf_counter() - time_st)}s] \n')
def download(self, url):
opts = {
'format': 'best',
'addmetadata': True,
'key': 'FFmpegMetadata',
'prefer_ffmpeg': True,
'geo_bypass': True,
'nocheckcertificate': True,
'outtmpl': './%(playlist_title)s/%(title)s.mp4',
'postprocessors': [{
'key': 'FFmpegVideoConvertor',
'preferedformat': 'mp4'
}],
'logtostderr': False,
}
self.task_len += 1
client = youtube_dl.YoutubeDL(opts)
error_c = 0
while error_c <= 4:
try:
ei = client.extract_info(url)
except Exception as e:
error_c += 1
self.log(f'#{error_c} - An error was raised : {url} \nException : {e}')
with contextlib.suppress(Exception):
self.log(f"Downloaded : {ei.get('title')} from {ei.get('uploader')} with url {url}")
self.rem_link(url)
return True
def data_check(self):
init_connection = httplib.HTTPSConnection(self.GOOGLE_DNS, timeout=5)
try:
init_connection.request("HEAD", "/")
return True
except Exception:
return False
finally:
init_connection.close()
def shut_down(self, scheduler: BlockingScheduler):
scheduler.remove_all_jobs()
scheduler.shutdown(False)
logging.warning('6 AM : Shutting Down all threads forcefully..')
os._exit(0) # exit all threads
dl_class = Downloader()
dl_class.logging_setup()
helper_class = Helper()
scheduler = BlockingScheduler(timezone=tzlocal())
if helper_class.is_interactive_shell():
should_use_sch = helper_class.give_out_bool(input('Do you wish to schedule this or run now? (Y/n) :\n'))
if should_use_sch:
hour, min = 0, 0
max_try = 0
while not hour:
if max_try >= 5:
dl_class.log('Alright, Max input recived.. Try again later! Byee...')
break
elif max_try > 0:
dl_class.log('Try again. Please Give Valid Input.')
time_date = input("Alright give me time input in the format : HH:MM:AM/PM. Example : 12:00:AM :\n")
hour, min = helper_class.strip_time_from_input(time_date)
max_try += 1
if hour:
scheduler.add_job(dl_class.downloader, trigger="cron", hour=hour, minute=min)
should_exit_script = helper_class.give_out_bool(input("Should exit the process at specficied time? Don't worry you can always resume download process... (y/n) : \n"))
if should_exit_script:
hour, min = 0, 0
max_try = 0
while not hour:
if max_try >= 5:
dl_class.log('Alright, Max input recived.. Not scheduling a exit..')
break
elif max_try > 0:
dl_class.log('Try again. Please Give Valid Input.')
time_date = input("Alright give me time input in the format : HH:MM:AM/PM. Example : 12:00:AM \n:")
hour, min = helper_class.strip_time_from_input(time_date)
max_try += 1
if hour:
scheduler.add_job(dl_class.shut_down, args=[scheduler], trigger="cron", hour=hour, minute=min)
logging.info('Script has been loaded all tasks has been scheduled respectively!')
scheduler.start()
else:
dl_class.downloader()
logging.info("All Tasks has been Terminated / completed.")
@StarkGang
Copy link
Author

Bad code tbh, its been a while working with python... Do you want one in rust too?

@StarkGang
Copy link
Author

Also, make a urls.txt and add your youtube urls in the file. you can even run in termux. i made this script because i use V! and they provide unlimited free data from 12 to 6 am. i schedule all my lectures using this script and by morning everything gets downloaded.. Do let me know if i can improve.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment