Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Download lecture videos of ai-class, with basic resume support
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = "Deepak.G.R."
__credits__ = "Sumod Hajela"
__license__ = 'Public Domain'
"""
usage:
Go to command line and type
python ai-class.py "topic-name"
topic-names can be "Welcome to AI", "Problem Solving"
If download is interrupted, delete the last partial downloaded file restart. The script will skip all existing files and start with next new file.
PS: Python2.6.2 should be installed in your system.
Let me know if you have any problems.
"""
from urllib import *
from urlparse import *
from sgmllib import SGMLParser
import re
import pdb
import sys
import json
from os import *
import os.path
url_youtube = 'http://www.youtube.com/watch?v='
#req_unit = 'problem solving'
#req_unit = 'welcome to AI'
req_unit = sys.argv[1]
quiz_hash = dict();
class UrlLister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []
self.flag = 0;
self.req_unit = req_unit;
self.names = [];
def start_a(self, attrs):
href = [value for name, value in attrs if name == 'href']
topic = re.search(r'/course/topic/(\d)+', str(href[0]))
if topic:
self.flag = 0
match = re.search(r'/course/video/\w+/\d+$', str(href[0]))
if match and self.flag == 1:
category = [value for name, value in attrs if name == 'id']
if 'quiz' in category[0]:
quiz_id = re.findall(r'quiz_(\d+)', category[0])[0]
video_id = quiz_hash[quiz_id]
else:
video_id = re.findall(r'video_\d+_(.+)', category[0])[0]
link = url_youtube + video_id
self.urls.append(link)
def handle_data(self, text):
if self.flag == 0:
text = text.strip();
text = re.sub(r'[^A-Za-z]', '', text)
self.req_unit = re.sub(r'[^A-Za-z]', '', self.req_unit)
match = re.search(text, self.req_unit, re.IGNORECASE)
if match and len(text) != 0:
self.flag = 1
def init_quiz_hash():
print 'STATUS: Initializing quiz_id hash'
quiz_url = 'http://www.ai-class.com/course/json/filter/QuizQuestion'
quiz_url = urlopen(quiz_url)
data = json.load(quiz_url)
quiz_id = list()
for ind in xrange(len(data['data'])):
piece = str(data['data'][ind])
match = re.findall('\'youtube_id\': u\'(.+?)\',.*?\'quiz_question\': (\d+?),', piece)
if match:
quiz_id.append(match[0])
for v, i in quiz_id:
quiz_hash[i] = v
print 'STATUS: quiz_id Initialized.'
def download_video(urls):
dirname = 'lecture ' + str(req_unit)
py_path = path.abspath(sys.argv[0])
py_path = path.dirname(py_path)
if not os.path.exists(dirname):
mkdir(dirname)
chdir(dirname)
for video_url in urls:
video_id = parse_qs(urlparse(video_url).query)['v'][0]
get_vars = parse_qs(unquote(urlopen("http://www.youtube.com/get_video_info?video_id="+video_id).read()))
title = get_vars['title'][0] + '.flv'
if os.path.isfile(title):
continue;
i = 0
entries = (get_vars['fmt_list'][0]).split(',')
for entry in entries:
match = re.search(r'^45.*', entry)
if match:
break;
i = i + 1;
link = get_vars['itag'][i]
link = re.findall(r'45,url=(.*)', link)[0]
print '\n-->Downloading, Title: ', title
urlretrieve(link, title)
"""
for v in get_vars.keys():
print v, '\n', get_vars[v], '\n\n'
pdb.set_trace()
"""
chdir(py_path)
def main():
init_quiz_hash();
page = urlopen("http://www.ai-class.com/home/")
htmlSource = page.read()
parser = UrlLister()
print 'STATUS: Fetching video urls.'
parser.feed(htmlSource)
print 'STATUS: SUCCESS'
page.close()
parser.close()
i = 0
"""
for url in parser.urls:
print 'url: ', url, '\n'
i = i + 1
"""
print 'Number of videos: ', len(parser.urls);
print 'STATUS: Starting download.'
download_video(parser.urls)
print '\n\n*********Download Finished*********'
if __name__ == "__main__":
main()
@cerotidinon

This comment has been minimized.

Copy link

@cerotidinon cerotidinon commented Oct 26, 2011

When having problems with downloading classes 5 and 6, just change the two occurences of 45 (lines 115 and 121) to 22. Then it should work for those lectures.

@danielmaxx

This comment has been minimized.

Copy link

@danielmaxx danielmaxx commented Oct 27, 2011

Hi,

I have an slow internet connection. How could I change this code to download only 240p versions of the videos?

BTW, how should we change the occurrences of 45 in the above code?

Thanks

@cerotidinon

This comment has been minimized.

Copy link

@cerotidinon cerotidinon commented Oct 27, 2011

Hi,

you can change it by opening the ai-class.py file with any editor (e.g. notepad under windows). For other resolutions try replacing the 45 by 35 (854x480), 34 (640x360) or 18 (320x240).

@danielmaxx

This comment has been minimized.

Copy link

@danielmaxx danielmaxx commented Oct 27, 2011

Hi,

Thanks for the previous answer, nevertheless, I have another question: how can I download the captions within the videos?

Thanks again :D

@cerotidinon

This comment has been minimized.

Copy link

@cerotidinon cerotidinon commented Oct 28, 2011

I didn't test it, but there is a Greasemonkey script for this: http://www.notesandreviews.com/education/downloading-ai-class-transcripts-from-youtube

@vickychijwani

This comment has been minimized.

Copy link

@vickychijwani vickychijwani commented Nov 12, 2011

How do I use this script if I'm behind an HTTP proxy?

@vickychijwani

This comment has been minimized.

Copy link

@vickychijwani vickychijwani commented Nov 12, 2011

Ok I figured how to use it with an HTTP proxy. See my fork of this gist (https://gist.github.com/1360366) for details.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment