miketahani/grabber.py

## grabber.py
#!/usr/local/bin/python3
# one-off scraper for egghead.io videos in a series
# usage: chmod +x grabber.py && ./grabber.py <series stub>
# requires youtube-dl (`brew install youtube-dl`)
import os
import subprocess
import json
import argparse
from urllib.request import urlretrieve

# TODO refactor to allow user to download series (playlist of videos),
#      lesson (single video), or whatever else there is on the site
parser = argparse.ArgumentParser()
parser.add_argument('series_stub', help='course/series stub (https://egghead.io/courses/<series_stub>)', type=str)
args = parser.parse_args()

# series_stub = 'building-react-applications-with-idiomatic-redux'
series_stub = args.series_stub
manifest_url = 'https://egghead.io/api/v1/series/{}/lessons'.format(series_stub)
output_dir = series_stub

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

local_manifest_filename = '{}/{}.manifest.json'.format(output_dir, series_stub)
if not os.path.exists(local_manifest_filename):
    urlretrieve(manifest_url, local_manifest_filename)

with open(local_manifest_filename, 'r') as manifest_file:
    manifest = json.loads(manifest_file.read())

# sort manifest by ids descending so we grab the videos in order
manifest = sorted(manifest, key=lambda vid: int(vid['id']))
# get the total number of videos for the log output below
num_videos = len(manifest)

for (idx, video) in enumerate(manifest):
    video_metadata = {
        'id': idx + 1,
        'slug': video['slug'],
        'url': video['http_url']
    }
    # path template for youtube-dl
    path_template = '{output_dir}/{id:03d}.{slug}.%(ext)s'.format(output_dir=output_dir, **video_metadata)
    # figure out the full local file path
    local_video_path = subprocess.check_output(['youtube-dl', '--get-filename', '-o', path_template, video_metadata['url'], '--restrict-filenames'])
    # convert path from bytes to utf-8 string, strip trailing subprocess.check_output "\n"
    local_video_path = local_video_path.decode('utf-8').strip()

    print('\n[+] video ({id} of {total}): {slug}'.format(total=num_videos, **video_metadata))

    # get the video with youtube-dl
    subprocess.run(['youtube-dl', '--no-overwrites', '--continue', '-o', local_video_path, video_metadata['url']])

print('\ndone. have a nice day!')
	#!/usr/local/bin/python3
	# one-off scraper for egghead.io videos in a series
	# usage: chmod +x grabber.py && ./grabber.py <series stub>
	# requires youtube-dl (`brew install youtube-dl`)
	import os
	import subprocess
	import json
	import argparse
	from urllib.request import urlretrieve

	# TODO refactor to allow user to download series (playlist of videos),
	# lesson (single video), or whatever else there is on the site
	parser = argparse.ArgumentParser()
	parser.add_argument('series_stub', help='course/series stub (https://egghead.io/courses/<series_stub>)', type=str)
	args = parser.parse_args()

	# series_stub = 'building-react-applications-with-idiomatic-redux'
	series_stub = args.series_stub
	manifest_url = 'https://egghead.io/api/v1/series/{}/lessons'.format(series_stub)
	output_dir = series_stub

	if not os.path.exists(output_dir):
	os.makedirs(output_dir)

	local_manifest_filename = '{}/{}.manifest.json'.format(output_dir, series_stub)
	if not os.path.exists(local_manifest_filename):
	urlretrieve(manifest_url, local_manifest_filename)

	with open(local_manifest_filename, 'r') as manifest_file:
	manifest = json.loads(manifest_file.read())

	# sort manifest by ids descending so we grab the videos in order
	manifest = sorted(manifest, key=lambda vid: int(vid['id']))
	# get the total number of videos for the log output below
	num_videos = len(manifest)

	for (idx, video) in enumerate(manifest):
	video_metadata = {
	'id': idx + 1,
	'slug': video['slug'],
	'url': video['http_url']
	}
	# path template for youtube-dl
	path_template = '{output_dir}/{id:03d}.{slug}.%(ext)s'.format(output_dir=output_dir, **video_metadata)
	# figure out the full local file path
	local_video_path = subprocess.check_output(['youtube-dl', '--get-filename', '-o', path_template, video_metadata['url'], '--restrict-filenames'])
	# convert path from bytes to utf-8 string, strip trailing subprocess.check_output "\n"
	local_video_path = local_video_path.decode('utf-8').strip()

	print('\n[+] video ({id} of {total}): {slug}'.format(total=num_videos, **video_metadata))

	# get the video with youtube-dl
	subprocess.run(['youtube-dl', '--no-overwrites', '--continue', '-o', local_video_path, video_metadata['url']])

	print('\ndone. have a nice day!')