Skip to content

Instantly share code, notes, and snippets.

@emctoo
Created June 2, 2012 03:27
Show Gist options
  • Save emctoo/2856390 to your computer and use it in GitHub Desktop.
Save emctoo/2856390 to your computer and use it in GitHub Desktop.
Get the video url
# -*- coding = utf-8 -*-
import urllib, urllib2, sys
from BeautifulSoup import BeautifulSoup # for html
def fetch_url(link):
''' '''
f = urllib.urlopen(link)
contents = f.read()
f.close()
'''
f.open(filename, 'w')
f.write(contents)
f.close()
'''
return BeautifulSoup(''.join(contents))
def listLinks(soup):
''' '''
links = []
for link in soup.findAll('a', rel = 'lecture-link'):
links.append(link.get('href'))
return links
def findMP4link(link):
''' '''
opener = urllib2.build_opener()
f = opener.open(link)
contents = f.read()
f.close()
opener.close()
soup = BeautifulSoup(''.join(contents))
links = soup.findAll('source', type = 'video/mp4')
assert len(links) == 1
file = links[0]
return file.get('src')
'''
files = []
for file in soup.findAll('source', type = 'video/mp4'):
files.append(file.get('src'))
return files
'''
if __name__ == "__main__":
''' '''
# for gpm
# link = "https://class.coursera.org/pgm/lecture/preview"
# soup = BeautifulSoup(''.join(contents))
# for ml
# link = "https://class.coursera.org/ml/lecture/preview"
# for nlp
link = "https://class.coursera.org/nlp/lecture/preview"
soup = fetch_url(link)
# f = open('contents.html', 'r')
# contents = f.read()
# f.close()
# print soup.prettify()
links = listLinks(soup)
# print links
f = open('nlp', 'w')
for link in links:
mp4_link = findMP4link(link)
f.write(mp4_link)
f.write("\n")
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment