Last active
November 24, 2022 05:26
-
-
Save StarkGang/ad3c71f70097206fc81c0fd7958fe367 to your computer and use it in GitHub Desktop.
Scheduler based multithreaded youtube_dl downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from dateutil.tz import tzlocal | |
import time | |
import logging | |
import traceback | |
import multiprocessing | |
from apscheduler.schedulers.background import BlockingScheduler | |
from concurrent.futures.thread import ThreadPoolExecutor | |
import http.client as httplib | |
from concurrent.futures import as_completed | |
import contextlib | |
import youtube_dl | |
import datetime | |
import sys | |
class Helper: | |
def __init__(self) -> None: | |
pass | |
def try_returning_a_int(self, arg: str): | |
try: | |
return int(arg) | |
except ValueError: | |
return str(arg).lower() | |
def strip_time_from_input(self, arg: str): | |
try: | |
dt_obj = datetime.datetime.strptime(arg, '%I:%M:%p') | |
except Exception: | |
return None, None | |
return dt_obj.hour, (dt_obj.minute or 0) | |
def is_interactive_shell(self): | |
return sys.__stdin__.isatty() | |
def give_out_bool(self, decision): | |
digit_as_dec = self.try_returning_a_int(decision) | |
if digit_as_dec in [1, 'yes', 'ya', 'yeh', 'yep', 'y', 'ok', 'haan']: | |
return True | |
return False | |
class Downloader: | |
def __init__(self) -> None: | |
self.urls = [] | |
self.task_len = 0 | |
self.GOOGLE_DNS = "8.8.8.8" | |
self.executor = ThreadPoolExecutor(max_workers=multiprocessing.cpu_count() * 2) | |
def logging_setup(self): | |
logging.basicConfig( | |
level=logging.INFO, | |
datefmt="[%d/%m/%Y %H:%M:%S]", | |
format="%(asctime)s - [Rogue-Downloader] >> %(levelname)s << %(message)s", | |
handlers=[logging.FileHandler("rogue-dl.log"), logging.StreamHandler()], | |
) | |
logging.getLogger("apscheduler").setLevel(logging.WARNING) | |
@staticmethod | |
def log( | |
message: str = None, | |
level=logging.INFO, | |
logger: logging.Logger = logging.getLogger(__module__), | |
) -> str: | |
logger.log(level, message or traceback.format_exc()) | |
return message or traceback.format_exc() | |
def rem_link(self, link): | |
with open('./urls.txt', 'r') as readable: | |
content = readable.readlines() | |
if link in content: | |
content.remove(link) | |
with open("./urls.txt", "w") as writeable: | |
writeable.writelines(content) | |
def read_file_return_list(self, file_path: str = './urls.txt'): | |
file_obj = open(file_path) | |
urls = [url.strip("\n") for url in file_obj] | |
self.urls = urls | |
def downloader(self): | |
conn = False | |
self.log('Checking For Data Connection...') | |
while not conn: | |
conn = self.data_check() | |
self.log('Data Connection Not Found.. Sleeping for 5s and retrying..') | |
time.sleep(5) | |
self.log("Connection initiated... Starting Program.") | |
self.log(f'Function Called at : {time.time()}') | |
time_st = time.perf_counter() | |
logging.info('Fetching URL(s) from the file and storing in :memory: \n') | |
self.read_file_return_list() | |
logging.info(f'Above Task | [Completed in {round(time.perf_counter() - time_st)}s] \n') | |
logging.info('Begining new download process in few seconds :\n ') | |
urls = self.urls | |
futures = [self.executor.submit(self.download, url) for url in urls] | |
for future in as_completed(futures): | |
if bool_ := future.result(): | |
self.log(f'Download Success : #{self.task_len}') | |
else: | |
self.log(f'Download Failed : #{self.task_len}') | |
logging.info(f'Above Task | [Completed in {round(time.perf_counter() - time_st)}s] \n') | |
def download(self, url): | |
opts = { | |
'format': 'best', | |
'addmetadata': True, | |
'key': 'FFmpegMetadata', | |
'prefer_ffmpeg': True, | |
'geo_bypass': True, | |
'nocheckcertificate': True, | |
'outtmpl': './%(playlist_title)s/%(title)s.mp4', | |
'postprocessors': [{ | |
'key': 'FFmpegVideoConvertor', | |
'preferedformat': 'mp4' | |
}], | |
'logtostderr': False, | |
} | |
self.task_len += 1 | |
client = youtube_dl.YoutubeDL(opts) | |
error_c = 0 | |
while error_c <= 4: | |
try: | |
ei = client.extract_info(url) | |
except Exception as e: | |
error_c += 1 | |
self.log(f'#{error_c} - An error was raised : {url} \nException : {e}') | |
with contextlib.suppress(Exception): | |
self.log(f"Downloaded : {ei.get('title')} from {ei.get('uploader')} with url {url}") | |
self.rem_link(url) | |
return True | |
def data_check(self): | |
init_connection = httplib.HTTPSConnection(self.GOOGLE_DNS, timeout=5) | |
try: | |
init_connection.request("HEAD", "/") | |
return True | |
except Exception: | |
return False | |
finally: | |
init_connection.close() | |
def shut_down(self, scheduler: BlockingScheduler): | |
scheduler.remove_all_jobs() | |
scheduler.shutdown(False) | |
logging.warning('6 AM : Shutting Down all threads forcefully..') | |
os._exit(0) # exit all threads | |
dl_class = Downloader() | |
dl_class.logging_setup() | |
helper_class = Helper() | |
scheduler = BlockingScheduler(timezone=tzlocal()) | |
if helper_class.is_interactive_shell(): | |
should_use_sch = helper_class.give_out_bool(input('Do you wish to schedule this or run now? (Y/n) :\n')) | |
if should_use_sch: | |
hour, min = 0, 0 | |
max_try = 0 | |
while not hour: | |
if max_try >= 5: | |
dl_class.log('Alright, Max input recived.. Try again later! Byee...') | |
break | |
elif max_try > 0: | |
dl_class.log('Try again. Please Give Valid Input.') | |
time_date = input("Alright give me time input in the format : HH:MM:AM/PM. Example : 12:00:AM :\n") | |
hour, min = helper_class.strip_time_from_input(time_date) | |
max_try += 1 | |
if hour: | |
scheduler.add_job(dl_class.downloader, trigger="cron", hour=hour, minute=min) | |
should_exit_script = helper_class.give_out_bool(input("Should exit the process at specficied time? Don't worry you can always resume download process... (y/n) : \n")) | |
if should_exit_script: | |
hour, min = 0, 0 | |
max_try = 0 | |
while not hour: | |
if max_try >= 5: | |
dl_class.log('Alright, Max input recived.. Not scheduling a exit..') | |
break | |
elif max_try > 0: | |
dl_class.log('Try again. Please Give Valid Input.') | |
time_date = input("Alright give me time input in the format : HH:MM:AM/PM. Example : 12:00:AM \n:") | |
hour, min = helper_class.strip_time_from_input(time_date) | |
max_try += 1 | |
if hour: | |
scheduler.add_job(dl_class.shut_down, args=[scheduler], trigger="cron", hour=hour, minute=min) | |
logging.info('Script has been loaded all tasks has been scheduled respectively!') | |
scheduler.start() | |
else: | |
dl_class.downloader() | |
logging.info("All Tasks has been Terminated / completed.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also, make a urls.txt and add your youtube urls in the file. you can even run in termux. i made this script because i use V! and they provide unlimited free data from 12 to 6 am. i schedule all my lectures using this script and by morning everything gets downloaded.. Do let me know if i can improve.