Skip to content

Instantly share code, notes, and snippets.

@smly
Created January 22, 2016 04:09
Show Gist options
  • Save smly/01604dc64aa49eac9782 to your computer and use it in GitHub Desktop.
Save smly/01604dc64aa49eac9782 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import argparse
import pickle
import os
import re
from subprocess import Popen
import requests
from bs4 import BeautifulSoup
BASE_URL = "https://www.youtube.com/playlist?list={list_id:s}"
WATCH_URL = "https://www.youtube.com/watch?v={video_id:s}"
WATCH_LINK_FMT = "/watch\?v=([^\&]+)"
TEMP_PLAYLIST_FILE = "/tmp/temp_playlist_response.pkl"
def download_playlist(list_id):
response = requests.get(BASE_URL.format(list_id=list_id))
if response.status_code != 200:
raise RuntimeError("Something is wrong")
with open(TEMP_PLAYLIST_FILE, 'w') as f:
pickle.dump(response, f)
def parse_playlist():
with open(TEMP_PLAYLIST_FILE, 'r') as f:
obj = pickle.load(f)
soup = BeautifulSoup(obj.text, "html.parser")
playlist_video_links = soup.find_all("a", attrs={
"class": "pl-video-title-link",
"href": re.compile('^/watch'),
})
for link_elem in playlist_video_links:
href_attr = link_elem.get('href')
video_id = re.match(WATCH_LINK_FMT, href_attr).group(1)
p = Popen(["youtube-dl", WATCH_URL.format(video_id=video_id)])
pid, sts = os.waitpid(p.pid, 0)
if __name__ == '__main__':
p = argparse.ArgumentParser()
p.add_argument('--list', '-l', type=str, required=True)
args = p.parse_args()
download_playlist(args.list)
parse_playlist()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment