Skip to content

Instantly share code, notes, and snippets.

@CTimmerman
Last active September 19, 2021 03:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CTimmerman/3ede451ca2b30ce551a6a977e2a7c0ae to your computer and use it in GitHub Desktop.
Save CTimmerman/3ede451ca2b30ce551a6a977e2a7c0ae to your computer and use it in GitHub Desktop.
YouTube Stream Downloader
"""YouTube Stream Downloader
2021-08-16 v1.0 by Cees Timmerman
2021-09-18 v1.1 Better error handling. Revert to XML captions as conversion to SRT is broken in pytube 11.0.1 for https://www.youtube.com/watch?v=AOZw1tgD8dA
"""
import logging, os, re
import pytube
from pytube.cli import on_progress
# Least gnarly solution for printing the source of any error or exception; traceback module only handled the latter.
logging.basicConfig(format='%(levelname)s:%(message)s', level=os.environ.get("YT_DL_LOGLEVEL", "INFO"))
log = logging.getLogger()
def fix_filename(name, max_length=255):
"""
Replace invalid characters on Linux/Windows/MacOS with underscores.
List from https://stackoverflow.com/a/31976060/819417
Trailing spaces & periods are ignored on Windows.
>>> fix_filename(" COM1 ")
'_ COM1 _'
>>> fix_filename("COM10")
'COM10'
>>> fix_filename("COM1,")
'COM1,'
>>> fix_filename("COM1.txt")
'_.txt'
>>> all('_' == fix_filename(chr(i)) for i in list(range(32)))
True
"""
return re.sub(r'[/\\:|<>"?*\0-\x1f]|^(AUX|COM[1-9]|CON|LPT[1-9]|NUL|PRN)(?![^.])|^\s|[\s.]$', "_", name[:max_length], flags=re.IGNORECASE)
# Includes . return re.sub(r'[/\\:|<>"?*\0-\31]|^(AUX|COM[1-9]|CON|LPT[1-9]|NUL|PRN)(\.|$)|^\s|[\s.]$', "_", name[:max_length], flags=re.IGNORECASE)
def download_youtube_stream(url, folder='output-folder'):
youtube = pytube.YouTube(url, on_progress_callback=on_progress)
print(youtube.title)
"""
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_age_restricted', '_author', '_embed_html', '_fmt_streams', '_initial_data', '_js', '_js_url', '_metadata', '_player_config_args', '_publish_date', '_title', '_vid_info', '_watch_html', 'age_restricted', 'allow_oauth_cache', 'author', 'bypass_age_gate', 'caption_tracks', 'captions', 'channel_id', 'channel_url', 'check_availability', 'description', 'embed_html', 'embed_url', 'fmt_streams', 'initial_data', 'js', 'js_url', 'keywords', 'length', 'metadata', 'publish_date', 'rating', 'register_on_complete_callback', 'register_on_progress_callback', 'stream_monostate', 'streaming_data', 'streams', 'thumbnail_url', 'title', 'use_oauth', 'vid_info', 'video_id', 'views', 'watch_html', 'watch_url']
"""
streams = youtube.streams
"""
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_filesize', '_monostate', 'abr', 'audio_codec', 'bitrate', 'codecs', 'default_filename', 'download', 'exists_at_path', 'expiration', 'filesize', 'filesize_approx', 'get_file_path', 'includes_audio_track', 'includes_video_track', 'is_3d', 'is_adaptive', 'is_dash', 'is_hdr', 'is_live', 'is_otf', 'is_progressive', 'itag', 'mime_type', 'on_complete', 'on_progress', 'parse_codecs', 'resolution', 'stream_to_buffer', 'subtype', 'title', 'type', 'url', 'video_codec']
"""
for i, s in enumerate(streams):
if s.includes_audio_track and s.includes_video_track:
print(f"{i + 1:>2}: {s.resolution:>5} {s.bitrate / 1000:>4,.0f}kbps {s.abr:>7} {s.mime_type:<10} {str(s.codecs):<28} {s.filesize / 1000_000:>7,.2f} MB {s.filesize / 1024 / 1024:>7,.2f} MiB")
choice = int(input("Which stream? ")) - 1
stream = streams[choice]
print("Downloading...")
stream.download(folder, skip_existing=True)
if 'en' in youtube.captions:
caption = youtube.captions['en']
print(caption)
# xml->srt broken in pytube 11.0.1 on https://www.youtube.com/watch?v=AOZw1tgD8dA
caption.download(title=fix_filename(youtube.title), output_path=folder, srt=False)
print(f"\nDone. {folder}\\{s.default_filename}")
if __name__ == "__main__":
try:
# Python 2 is dangerous (use raw_input) but unsupported.
download_youtube_stream(input("YouTube URL: "))
except Exception as ex:
log.exception(ex)
input() # Keep console open until user presses enter.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment