Skip to content

Instantly share code, notes, and snippets.

@st3fan
Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save st3fan/e9708173c8dc34559637 to your computer and use it in GitHub Desktop.
Save st3fan/e9708173c8dc34559637 to your computer and use it in GitHub Desktop.
Little hack to grab all the WWDC videos (in HD) and PDFs
#!/usr/bin/env python
# virtualenv env
# source env/bin/activate
# pip install beautifulsoup4 requests
# ./grab-videos.py
import logging
import multiprocessing
import subprocess
import urlparse
import bs4
import requests
def download(url):
u = urlparse.urlparse(url)
output = u.path.split('/')[-1]
logging.info("Downloading %s" % output)
subprocess.call(["wget", "--continue", "-q", "-O", output, url])
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s', datefmt='%y-%m-%d %H:%M:%S')
r = requests.get("https://developer.apple.com/videos/wwdc/2014/")
r.raise_for_status()
soup = bs4.BeautifulSoup(r.text)
urls = []
for a in soup.find_all('a'):
if True: # 'href' in a:
url = urlparse.urlparse(a['href'])
if url.path.endswith('.mov'):
components = url.path.split('/')
if '_hd_' in components[-1]:
urls.append(a['href'])
elif url.path.endswith('pdf'):
urls.append(a['href'])
if len(urls):
pool = multiprocessing.Pool(processes=3)
for url in urls:
pool.apply_async(download, [url])
pool.close()
pool.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment