Last active
December 11, 2015 14:48
-
-
Save arunisrael/4616677 to your computer and use it in GitHub Desktop.
Khan Academy Video To Exercise Mapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python script to generate markdown formatted list of Khan Academy videos that need exercises. | |
To run yourself: | |
1. Download khan_academy_video_library.html from Dropbox or follow the instructions in the khan_academy_video_library.txt to generate this file yourself. | |
2. Make sure you have Python 2.7+ and BeautifulSoup4 installed. | |
3. Run "python KAVideoToExercise.py" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import re | |
import urllib2 | |
class KAVideoToExercise: | |
BASE_URL = 'http://www.khanacademy.org' | |
PRACTICE_BUTTON_TAG = 'a' | |
VIDEO_TITLE_TAG = 'span' | |
def __init__(self, initial_data): | |
for key in initial_data: | |
setattr(self, key, initial_data[key]) | |
self.file_soup = "" | |
self.video_links = [] | |
self.video_dictionary = {} | |
self.videos_wo_exercises = {} | |
self.video_list = "" | |
def perform_mapping(self): | |
self.read_input_file() | |
self.find_video_links() | |
self.build_video_dictionary() | |
self.check_videos_for_exercises() | |
self.output_markdown() | |
def read_input_file(self): | |
self.file_soup = BeautifulSoup(open(self.input_file).read()) | |
def find_video_links(self): | |
self.video_links = self.file_soup.find_all( | |
href=re.compile(self.video_filter), class_=self.video_class) | |
def build_video_dictionary(self): | |
for link in self.video_links: | |
title = link.find(self.VIDEO_TITLE_TAG, class_=re.compile( | |
self.video_title_class)).contents[0].string | |
href = link['href'] | |
self.video_dictionary[title] = href | |
def check_videos_for_exercises(self): | |
for key in self.video_dictionary.iterkeys(): | |
full_url = self.BASE_URL + self.video_dictionary[key] | |
try: | |
raw = urllib2.urlopen(full_url).read() | |
except urllib2.HTTPError: | |
pass | |
soup = BeautifulSoup(raw) | |
has_practice = soup.find( | |
self.PRACTICE_BUTTON_TAG, class_=self.practice_class) | |
if (has_practice is None): | |
self.videos_wo_exercises[key] = full_url | |
def output_markdown(self): | |
for key in sorted(self.videos_wo_exercises.iterkeys()): | |
line = "[" + key + "]" + "(" + self.videos_wo_exercises[key] + ")" | |
print(line.encode('utf-8')) | |
INPUT_FILE = "khan_academy_video_library.html" | |
VIDEO_FILTER = "/math" | |
VIDEO_CLASS = 'vid-progress' | |
VIDEO_TITLE_CLASS = "progress-title" | |
PRACTICE_CLASS = "practice" | |
mapper = KAVideoToExercise( | |
{ | |
"input_file": INPUT_FILE, | |
"video_filter": VIDEO_FILTER, | |
"video_class": VIDEO_CLASS, | |
"video_title_class": VIDEO_TITLE_CLASS, | |
"practice_class": PRACTICE_CLASS | |
}) | |
mapper.perform_mapping() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The dropbox link below contains the generated HTML source for https://www.khanacademy.org/library | |
# To get the HTML source yourself | |
# - Open https://www.khanacademy.org/library in Google Chrome | |
# - Right click anywhere on the page and select 'Inspect Element' | |
# - Right click on the <html> node at the top of the Elements Browswer and select 'Copy as HTML' | |
# - Paste into your favorite text editor and save | |
https://dl.dropbox.com/u/46010896/khan_academy_video_library.html |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment