Skip to content

Instantly share code, notes, and snippets.

@dearmark
Created January 17, 2014 05:55
Show Gist options
  • Save dearmark/8469006 to your computer and use it in GitHub Desktop.
Save dearmark/8469006 to your computer and use it in GitHub Desktop.
import requests
import sys
import re
import urllib
cookie = {'PHPSESSID': 'phulssv3eg4bijf3cb423beve4'}
def get_lesson_urls(html):
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', html)
return [url for url in urls if 'lesson' in url]
def get_page(url):
rv = requests.get(url, cookies=cookie)
return rv.text
def get_video_url(html):
result = re.search(r"download_url':'(.+?)'", html)
url = result.groups()[0]
return urllib.unquote(url)
def main():
try:
course_url = sys.argv[1]
except IndexError:
print 'The first parameter must be course URL'
exit(-1)
print 'Course URL:', course_url
print 'Getting all course URLs'
html = get_page(course_url)
urls = get_lesson_urls(html)
print urls
for url in urls:
# print 'Getting video URL for', url
html = get_page(url)
print get_video_url(html)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment