Skip to content

Instantly share code, notes, and snippets.

@jayrambhia
Created March 11, 2012 07:56
Show Gist options
  • Save jayrambhia/2015497 to your computer and use it in GitHub Desktop.
Save jayrambhia/2015497 to your computer and use it in GitHub Desktop.
A python script to download all the TED videos
import urllib2
import urllib
#import re
from BeautifulSoup import BeautifulSoup
"""
Download this file in html version
https://docs.google.com/spreadsheet/ccc?key=0AsKzpC8gYBmTcGpHbFlILThBSzhmZkRhNm8yYllsWGc&hl=en#gid=0"
"""
url = "The URL of above file" # url = "file:///home/jay/Downloads/index.html"
proxy = {"http":"http://user:pass@proxy:port/","https":"https://http://user:pass@proxy:port/"}
Proxy = urllib2.ProxyHandler(proxy)
opener = urllib2.build_opener(Proxy)
urllib2.install_opener(opener)
page=urllib.urlopen(url)
soup=BeautifulSoup(page.read())
links=[]
hrefs = soup.findAll("a",{"target":True,"href":True,"style":True})
for link in hrefs:
#print link.contents
links.append(link.contents[0])
for vids in links:
page = urllib2.urlopen(vids)
soup = BeautifulSoup(page.read())
#print soup.prettify()
vid_link = soup.find("meta",{"property":"og:video","content":True})["content"]
print vid_link
print "Downloading",vid_link.split("/")[-1]
f = open(vid_link.split("/")[-1],"wb")
f.write(opener.open(vid_link).read())
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment