AshwinRamesh/destroy_all_software_downloader.py

## destroy_all_software_downloader.py
#!/usr/bin/python
# encoding: utf-8

# Based off the script from https://gist.github.com/Paaskehare/3949299
# @author: Ash Ramesh (27/10/2016)

import cookielib
import urllib
import urllib2
import re
import os.path

# Auth details
email    = 'ADD EMAIL HERE'
password = 'ADD PASSWORD HERE'

# Setup urllib
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
urllib2.install_opener(opener)

# Base url for DAS
BASE_URL = 'https://www.destroyallsoftware.com/'

def login():

    # Get the sign in page
    url = BASE_URL + 'screencasts/users/sign_in'
    page = urllib2.urlopen(url).read()

    # Get the auth token
    token = re.search('<input type="hidden" name="authenticity_token" value="([\w/\+=]+?)" />', page).group(1)

    # Build the form data
    values = {
        'utf8':               '✓',
        'authenticity_token': token,
        'user[email]':        email,
        'user[password]':     password,
        'commit': 'Sign in'
    }

    # Call the form with sign in
    data = urllib.urlencode(values)
    req = urllib2.Request(url, data)

    return urllib2.urlopen(req).read()

def get_catalog_urls():
    """Get a list of relative urls to each video in the catalog.
    Urls look like '/screencasts/catalog/clarity-via-isolated-tests'
    """
    url = BASE_URL + 'screencasts/catalog'
    page = urllib2.urlopen(url).read()
    screencasts = re.findall('<div class="episode">\s+<a href="(.*?)">', page)[::-1]
    return screencasts

def download_all_screencasts(relative_url_list):
    for screencast_url in relative_url_list:

        # Build filename - e.g. clarity-via-isolated-tests
        filename = screencast_url.split('/')[-1]
        full_filename = filename + '.mov'

        if os.path.exists(full_filename):
            print('Already downloaded: ' + filename + ' skipping ...')
            continue

        else:
            print('Attempting to retrieve and download ' + filename)

        # Go to the screencast page and get the source url for the video
        url = BASE_URL + screencast_url[1:]  # Remove / from the relative url
        page = urllib2.urlopen(url).read()
        download_urls = re.findall('source.src = "(.*?)"', page)

        # Download the video & save to file
        print('Downloading "' + filename + '" ...')
        req = urllib2.Request(download_urls[0])
        response = urllib2.urlopen(req)
        while 1:
            data = response.read(512)
            if not len(data):
                break
            else:
                with open(filename + '.mov', 'ab') as f:
                    f.write(data)

def main():
    page = login()
    catalog_urls = get_catalog_urls()
    download_all_screencasts(catalog_urls)

main()
	#!/usr/bin/python
	# encoding: utf-8

	# Based off the script from https://gist.github.com/Paaskehare/3949299
	# @author: Ash Ramesh (27/10/2016)

	import cookielib
	import urllib
	import urllib2
	import re
	import os.path

	# Auth details
	email = 'ADD EMAIL HERE'
	password = 'ADD PASSWORD HERE'

	# Setup urllib
	opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
	urllib2.install_opener(opener)

	# Base url for DAS
	BASE_URL = 'https://www.destroyallsoftware.com/'

	def login():

	# Get the sign in page
	url = BASE_URL + 'screencasts/users/sign_in'
	page = urllib2.urlopen(url).read()

	# Get the auth token
	token = re.search('<input type="hidden" name="authenticity_token" value="([\w/\+=]+?)" />', page).group(1)

	# Build the form data
	values = {
	'utf8': '✓',
	'authenticity_token': token,
	'user[email]': email,
	'user[password]': password,
	'commit': 'Sign in'
	}

	# Call the form with sign in
	data = urllib.urlencode(values)
	req = urllib2.Request(url, data)

	return urllib2.urlopen(req).read()

	def get_catalog_urls():
	"""Get a list of relative urls to each video in the catalog.
	Urls look like '/screencasts/catalog/clarity-via-isolated-tests'
	"""
	url = BASE_URL + 'screencasts/catalog'
	page = urllib2.urlopen(url).read()
	screencasts = re.findall('<div class="episode">\s+<a href="(.*?)">', page)[::-1]
	return screencasts

	def download_all_screencasts(relative_url_list):
	for screencast_url in relative_url_list:

	# Build filename - e.g. clarity-via-isolated-tests
	filename = screencast_url.split('/')[-1]
	full_filename = filename + '.mov'

	if os.path.exists(full_filename):
	print('Already downloaded: ' + filename + ' skipping ...')
	continue

	else:
	print('Attempting to retrieve and download ' + filename)

	# Go to the screencast page and get the source url for the video
	url = BASE_URL + screencast_url[1:] # Remove / from the relative url
	page = urllib2.urlopen(url).read()
	download_urls = re.findall('source.src = "(.*?)"', page)

	# Download the video & save to file
	print('Downloading "' + filename + '" ...')
	req = urllib2.Request(download_urls[0])
	response = urllib2.urlopen(req)
	while 1:
	data = response.read(512)
	if not len(data):
	break
	else:
	with open(filename + '.mov', 'ab') as f:
	f.write(data)

	def main():
	page = login()
	catalog_urls = get_catalog_urls()
	download_all_screencasts(catalog_urls)

	main()