Skip to content

Instantly share code, notes, and snippets.

@miketahani
Created January 24, 2018 20:16
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save miketahani/08dc408ad9c3b142fc335bd13a13e411 to your computer and use it in GitHub Desktop.
Save miketahani/08dc408ad9c3b142fc335bd13a13e411 to your computer and use it in GitHub Desktop.
egghead.io series scraper
#!/usr/local/bin/python3
# one-off scraper for egghead.io videos in a series
# usage: chmod +x grabber.py && ./grabber.py <series stub>
# requires youtube-dl (`brew install youtube-dl`)
import os
import subprocess
import json
import argparse
from urllib.request import urlretrieve
# TODO refactor to allow user to download series (playlist of videos),
# lesson (single video), or whatever else there is on the site
parser = argparse.ArgumentParser()
parser.add_argument('series_stub', help='course/series stub (https://egghead.io/courses/<series_stub>)', type=str)
args = parser.parse_args()
# series_stub = 'building-react-applications-with-idiomatic-redux'
series_stub = args.series_stub
manifest_url = 'https://egghead.io/api/v1/series/{}/lessons'.format(series_stub)
output_dir = series_stub
if not os.path.exists(output_dir):
os.makedirs(output_dir)
local_manifest_filename = '{}/{}.manifest.json'.format(output_dir, series_stub)
if not os.path.exists(local_manifest_filename):
urlretrieve(manifest_url, local_manifest_filename)
with open(local_manifest_filename, 'r') as manifest_file:
manifest = json.loads(manifest_file.read())
# sort manifest by ids descending so we grab the videos in order
manifest = sorted(manifest, key=lambda vid: int(vid['id']))
# get the total number of videos for the log output below
num_videos = len(manifest)
for (idx, video) in enumerate(manifest):
video_metadata = {
'id': idx + 1,
'slug': video['slug'],
'url': video['http_url']
}
# path template for youtube-dl
path_template = '{output_dir}/{id:03d}.{slug}.%(ext)s'.format(output_dir=output_dir, **video_metadata)
# figure out the full local file path
local_video_path = subprocess.check_output(['youtube-dl', '--get-filename', '-o', path_template, video_metadata['url'], '--restrict-filenames'])
# convert path from bytes to utf-8 string, strip trailing subprocess.check_output "\n"
local_video_path = local_video_path.decode('utf-8').strip()
print('\n[+] video ({id} of {total}): {slug}'.format(total=num_videos, **video_metadata))
# get the video with youtube-dl
subprocess.run(['youtube-dl', '--no-overwrites', '--continue', '-o', local_video_path, video_metadata['url']])
print('\ndone. have a nice day!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment