Skip to content

Instantly share code, notes, and snippets.

@hckr
Last active November 20, 2017 01:19
Show Gist options
  • Save hckr/2458d2144ae97fcf081957ce9c7b4e86 to your computer and use it in GitHub Desktop.
Save hckr/2458d2144ae97fcf081957ce9c7b4e86 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import requests
import re
import yaml
import urllib.request
import urllib.error
import time
import os
from lxml import html
login_page_url = 'https://www.codeschool.com/users/sign_in'
s = requests.Session()
token = html.fromstring(s.get(login_page_url).content).xpath('//input[@name="authenticity_token"]')[0].value
s.post(login_page_url, data={
'authenticity_token': token,
'user[login]': '',
'user[password]': ''
})
currently_downloading_path = None
def download_videos(codeschool_url, dest_prefix=''):
global currently_downloading_path
course_videos_page = str(s.get(codeschool_url).content)
result = re.search(r'VideoManager\((.+?)\);', course_videos_page, re.DOTALL)
if result is None:
print(' No videos.', end='', flush=True)
return
course_videos_json = result.group(1).replace(r"\'", r"'")
try:
course_data = yaml.load(course_videos_json)
except Exception as e:
print('JSON Error: ' + str(e))
print(course_videos_json)
exit(1)
if dest_prefix != '':
try:
os.makedirs(dest_prefix)
except FileExistsError:
pass
count = len(course_data['media'])
for i, video in enumerate(course_data['media']):
print(' ', end='', flush=True)
title = video['title'].replace(' ', '_').replace('/', '_')
if count > 1:
dest = os.path.join(dest_prefix, ('%i_%s.mp4' % (i + 1, title)))
print('Downloading video %i/%i... ' % (i + 1, count), end='', flush=True)
else:
dest = os.path.join(dest_prefix, ('%s.mp4' % title))
url = video['media'].replace('/data.json?', '.mp4?profile=720p&').replace(r'\u0026', '&')
if os.path.exists(dest):
print('Already existsted.')
else:
currently_downloading_path = dest
try:
urllib.request.urlretrieve(url, dest)
except urllib.error.HTTPError:
print('Falling back to 480p', end='', flush=True)
urllib.request.urlretrieve(url.replace('profile=720p', 'profile=480p'), dest)
currently_downloading_path = None
print('Done.')
time.sleep(10)
def download_course(name):
download_videos('https://www.codeschool.com/courses/' + name + '/videos', name)
try:
courses_page = str(s.get('https://www.codeschool.com/courses/').content)
courses = re.findall(r'<a href="/courses/([a-z-]+)">', courses_page)
courses_len = len(courses)
for i, course in enumerate(courses):
print('Downloading course %i/%i: %s' % (i + 1, courses_len, course))
download_course(course)
except KeyboardInterrupt:
if currently_downloading_path and os.path.exists(currently_downloading_path):
os.remove(currently_downloading_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment