hckr/codeschool_download.py

## codeschool_download.py
#!/usr/bin/env python3

import requests
import re
import yaml
import urllib.request
import urllib.error
import time
import os
from lxml import html

login_page_url = 'https://www.codeschool.com/users/sign_in'

s = requests.Session()
token = html.fromstring(s.get(login_page_url).content).xpath('//input[@name="authenticity_token"]')[0].value
s.post(login_page_url, data={
    'authenticity_token': token,
    'user[login]': '',
    'user[password]': ''
})

currently_downloading_path = None


def download_videos(codeschool_url, dest_prefix=''):
    global currently_downloading_path

    course_videos_page = str(s.get(codeschool_url).content)
    result = re.search(r'VideoManager\((.+?)\);', course_videos_page, re.DOTALL)
    if result is None:
        print('    No videos.', end='', flush=True)
        return

    course_videos_json = result.group(1).replace(r"\'", r"'")
    try:
        course_data = yaml.load(course_videos_json)
    except Exception as e:
        print('JSON Error: ' + str(e))
        print(course_videos_json)
        exit(1)

    if dest_prefix != '':
        try:
            os.makedirs(dest_prefix)
        except FileExistsError:
            pass

    count = len(course_data['media'])
    for i, video in enumerate(course_data['media']):
        print('    ', end='', flush=True)
        title = video['title'].replace(' ', '_').replace('/', '_')
        if count > 1:
            dest = os.path.join(dest_prefix, ('%i_%s.mp4' % (i + 1, title)))
            print('Downloading video %i/%i... ' % (i + 1, count), end='', flush=True)
        else:
            dest = os.path.join(dest_prefix, ('%s.mp4' % title))
        url = video['media'].replace('/data.json?', '.mp4?profile=720p&').replace(r'\u0026', '&')
        if os.path.exists(dest):
            print('Already existsted.')
        else:
            currently_downloading_path = dest
            try:
                urllib.request.urlretrieve(url, dest)
            except urllib.error.HTTPError:
                print('Falling back to 480p', end='', flush=True)
                urllib.request.urlretrieve(url.replace('profile=720p', 'profile=480p'), dest)
            currently_downloading_path = None
            print('Done.')
            time.sleep(10)


def download_course(name):
    download_videos('https://www.codeschool.com/courses/' + name + '/videos', name)


try:

    courses_page = str(s.get('https://www.codeschool.com/courses/').content)
    courses = re.findall(r'<a href="/courses/([a-z-]+)">', courses_page)

    courses_len = len(courses)
    for i, course in enumerate(courses):
        print('Downloading course %i/%i: %s' % (i + 1, courses_len, course))
        download_course(course)

except KeyboardInterrupt:
    if currently_downloading_path and os.path.exists(currently_downloading_path):
        os.remove(currently_downloading_path)
	#!/usr/bin/env python3

	import requests
	import re
	import yaml
	import urllib.request
	import urllib.error
	import time
	import os
	from lxml import html

	login_page_url = 'https://www.codeschool.com/users/sign_in'

	s = requests.Session()
	token = html.fromstring(s.get(login_page_url).content).xpath('//input[@name="authenticity_token"]')[0].value
	s.post(login_page_url, data={
	'authenticity_token': token,
	'user[login]': '',
	'user[password]': ''
	})

	currently_downloading_path = None


	def download_videos(codeschool_url, dest_prefix=''):
	global currently_downloading_path

	course_videos_page = str(s.get(codeschool_url).content)
	result = re.search(r'VideoManager\((.+?)\);', course_videos_page, re.DOTALL)
	if result is None:
	print(' No videos.', end='', flush=True)
	return

	course_videos_json = result.group(1).replace(r"\'", r"'")
	try:
	course_data = yaml.load(course_videos_json)
	except Exception as e:
	print('JSON Error: ' + str(e))
	print(course_videos_json)
	exit(1)

	if dest_prefix != '':
	try:
	os.makedirs(dest_prefix)
	except FileExistsError:
	pass

	count = len(course_data['media'])
	for i, video in enumerate(course_data['media']):
	print(' ', end='', flush=True)
	title = video['title'].replace(' ', '_').replace('/', '_')
	if count > 1:
	dest = os.path.join(dest_prefix, ('%i_%s.mp4' % (i + 1, title)))
	print('Downloading video %i/%i... ' % (i + 1, count), end='', flush=True)
	else:
	dest = os.path.join(dest_prefix, ('%s.mp4' % title))
	url = video['media'].replace('/data.json?', '.mp4?profile=720p&').replace(r'\u0026', '&')
	if os.path.exists(dest):
	print('Already existsted.')
	else:
	currently_downloading_path = dest
	try:
	urllib.request.urlretrieve(url, dest)
	except urllib.error.HTTPError:
	print('Falling back to 480p', end='', flush=True)
	urllib.request.urlretrieve(url.replace('profile=720p', 'profile=480p'), dest)
	currently_downloading_path = None
	print('Done.')
	time.sleep(10)


	def download_course(name):
	download_videos('https://www.codeschool.com/courses/' + name + '/videos', name)


	try:

	courses_page = str(s.get('https://www.codeschool.com/courses/').content)
	courses = re.findall(r'<a href="/courses/([a-z-]+)">', courses_page)

	courses_len = len(courses)
	for i, course in enumerate(courses):
	print('Downloading course %i/%i: %s' % (i + 1, courses_len, course))
	download_course(course)

	except KeyboardInterrupt:
	if currently_downloading_path and os.path.exists(currently_downloading_path):
	os.remove(currently_downloading_path)