Skip to content

Instantly share code, notes, and snippets.

@martjanz
Last active September 29, 2020 06:21
Show Gist options
  • Save martjanz/f6092184e6c0cabf2b1c7a8bd8cf733e to your computer and use it in GitHub Desktop.
Save martjanz/f6092184e6c0cabf2b1c7a8bd8cf733e to your computer and use it in GitHub Desktop.
YouTube Channel Downloader
# YouTube Channel Downloader
#
# Download all videos from all user/channel playlists
#
# TODO: check pagination. Tested with up to 10 playlists and up to 50 videos each.
import json
import re
import time
import traceback
from urllib.request import urlopen
# External dependencies
# - jsonpath_rw
# - pytube3
# - requests
from jsonpath_rw import parse
from pytube import Playlist
from pytube import YouTube
import requests
# -- Parameters --
media_type = 'video' # or 'audio'
# Channel playlists to download
channel_name = 'ArchivoHistóricoRTA'
def get_channel_playlists(yt_username):
channel_playlists_url = 'https://www.youtube.com/c/{}/playlists'.format(yt_username)
html = requests.get(channel_playlists_url).text
# Get json metadata from HTML
_js_regex = re.compile(r"window\[\"ytInitialData\"] = ([^\n]+)")
raw_json = _js_regex.search(html).group(1)[0:-1]
json_decoded = json.loads(raw_json)
jsonpath = '$..gridRenderer.items..gridPlaylistRenderer'
jsonpath_parsed = parse(jsonpath)
playlists = [match.value for match in jsonpath_parsed.find(json_decoded)]
items = []
for playlist in playlists:
items.append({
'id': playlist['playlistId'],
'title': playlist['title']['runs'][0]['text']
})
return items
# -- Code --
def download_audio(url, path='.'):
print('Downloading audio from {}...'.format(url))
try:
YouTube(video_url) \
.streams \
.filter(only_audio=True, file_extension='mp4')[0] \
.download()
except Exception as e:
traceback.print_exc()
pass
def download_video(url, path='.'):
print('Downloading video from {}...'.format(url))
try:
YouTube(url) \
.streams \
.filter(progressive=True, file_extension='mp4') \
.order_by('resolution') \
.desc() \
.first() \
.download(path)
except Exception as e:
traceback.print_exc()
pass
playlists = get_channel_playlists(channel_name)
for playlist in playlists:
yt_playlist = Playlist('https://www.youtube.com/playlist?list={}'.format(playlist['id']))
dest_path = playlist['title']
print('Downloading {} {}s from playlist...'.format(len(yt_playlist.video_urls), media_type))
for video_url in yt_playlist.video_urls:
if media_type == 'video':
download_video(video_url, dest_path) # Download audio
elif media_type == 'audio':
download_audio(video_url, dest_path) # Download video (with audio)
else:
print('Media type not supported. Check "media_type" variable.')
# Throttle to avoid YouTube restriction (Too many requests)
time.sleep(3)
jsonpath_rw
pytube3
requests
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment