Skip to content

Instantly share code, notes, and snippets.

@ahknight
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ahknight/5fab02cd007bac2b21ad to your computer and use it in GitHub Desktop.
Save ahknight/5fab02cd007bac2b21ad to your computer and use it in GitHub Desktop.
Downloads WWDC media. Works for 2011, 2013, and 2014. 2012 seems to break on authentication, even with the ADCDownloadAuth cookie provided. Requires BeautifulSoup4 and Requests as well as the value of your ADCDownloadAuth cookie in a JSON file in the same dir as "wwdc_cookies.json".
#!/usr/bin/env python3
import argparse
import datetime
import os
import json
import requests
from bs4 import BeautifulSoup
def scrape_session_page(url, event_name=""):
r = requests.get(url)
html_doc = r.text
soup = BeautifulSoup(html_doc)
sessions = soup.find_all('li', class_='session')
for session in sessions:
try:
session_id = session.get('id').replace('-video','')
title = session.find('li', class_='title')
track = session.find('li', class_='track')
platform = session.find('li', class_='platform')
downloads = session.find('p', class_='download').find_all('a')
title = title.string if title else ""
track = track.string if track else ""
platform = platform.string if platform else ""
session_name = "%s - Session %s - %s (%s, %s)" % (event_name, session_id, title, track, platform)
# print(session_name + ":")
except Exception as e:
print("ERROR: Could not parse session entry: %r" % e)
continue
for link in downloads:
try:
link_type = link.string.upper()
if link_type == 'PDF' and not args.pdf: continue
if link_type == 'HD' and not args.hd: continue
if link_type == 'SD' and not args.sd: continue
link_href = link.get('href')
link_href = link_href.replace('https://developer.apple.com/devcenter/download.action?path=/', 'http://adcdownload.apple.com/')
filename = "%s [%s]" % (session_name, link_type)
if link_type == 'PDF':
filename += '.pdf'
else:
filename += '.m4v'
# print(" %s" % filename)
# continue
if link_href.startswith('http'):
r = requests.get(link_href, cookies=cookie_jar, stream=True)
size = int(r.headers['content-length'])
if os.path.exists(filename):
s = os.stat(filename)
if s.st_size >= size:
print(" %s: Done.\r" % (filename,))
continue
with open(filename, 'wb') as fd:
chunk_size = 4*1024*1024
progress = 0
for chunk in r.iter_content(chunk_size):
progress += len(chunk)
print(" %s: %0.2f%%\r" % (filename, progress/float(size)*100.0), end="", flush=True)
fd.write(chunk)
print("") #Newline
except Exception as e:
print("")
print("Error downloading %s: %r" % (filename, e))
# print(r.headers, r.text)
continue
today = datetime.datetime.now()
this_year = today.year
parser = argparse.ArgumentParser(description='Download WWDC videos and presentations.')
parser.add_argument('-y', '--year', action='append', type=str, help='The year to download. Can be specified multiple times. Defaults to the current year (%d).' % this_year)
parser.add_argument('--tt', action="store_true", help='Download the media from the current Tech Talks page.')
parser.add_argument('--hd', action="store_true", help='Download the HD version of the session videos.')
parser.add_argument('--sd', action="store_true", help='Download the SD version of the session videos.')
parser.add_argument('--pdf', action="store_true", help='Download the PDFs of the session presentations.')
args = parser.parse_args()
if (args.year == None or len(args.year) == 0) and not args.tt:
args.year = [str(this_year)]
if not args.pdf and not args.sd and not args.hd:
args.pdf = args.sd = args.hd = True
with open("wwdc_cookies.json", "r") as fp:
cookie_jar = json.load(fp)
base_url = 'https://developer.apple.com/videos/wwdc/'
tt_url = 'https://developer.apple.com/tech-talks/videos/'
if args.tt:
scrape_session_page(tt_url, event_name="iOS 7 Tech Talks")
years = args.year
if years:
for year in years:
wwdc_url = "/".join([base_url, year, ""])
scrape_session_page(wwdc_url, event_name="WWDC %s" % year)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment