Skip to content

Instantly share code, notes, and snippets.

@avyfain
Created September 26, 2020 05:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avyfain/cb6ff07daad8f1bb19ee835e80169594 to your computer and use it in GitHub Desktop.
Save avyfain/cb6ff07daad8f1bb19ee835e80169594 to your computer and use it in GitHub Desktop.
mitocw.py
import re
from collections import defaultdict
from pprint import pprint
import requests
from bs4 import BeautifulSoup
departments = [
"https://ocw.mit.edu/courses/architecture",
"https://ocw.mit.edu/courses/media-arts-and-sciences",
"https://ocw.mit.edu/courses/urban-studies-and-planning",
"https://ocw.mit.edu/courses/aeronautics-and-astronautics",
"https://ocw.mit.edu/courses/biological-engineering",
"https://ocw.mit.edu/courses/chemical-engineering",
"https://ocw.mit.edu/courses/civil-and-environmental-engineering",
"https://ocw.mit.edu/courses/electrical-engineering-and-computer-science",
"https://ocw.mit.edu/courses/engineering-systems-division",
"https://ocw.mit.edu/courses/health-sciences-and-technology",
"https://ocw.mit.edu/courses/institute-for-data-systems-and-society",
"https://ocw.mit.edu/courses/materials-science-and-engineering",
"https://ocw.mit.edu/courses/mechanical-engineering",
"https://ocw.mit.edu/courses/nuclear-engineering",
"https://ocw.mit.edu/courses/anthropology",
"https://ocw.mit.edu/courses/comparative-media-studies-writing",
"https://ocw.mit.edu/courses/economics",
"https://ocw.mit.edu/courses/global-languages",
"https://ocw.mit.edu/courses/history",
"https://ocw.mit.edu/courses/linguistics-and-philosophy",
"https://ocw.mit.edu/courses/literature",
"https://ocw.mit.edu/courses/music-and-theater-arts",
"https://ocw.mit.edu/courses/political-science",
"https://ocw.mit.edu/courses/science-technology-and-society",
"https://ocw.mit.edu/courses/womens-and-gender-studies",
"https://ocw.mit.edu/courses/biology",
"https://ocw.mit.edu/courses/brain-and-cognitive-sciences",
"https://ocw.mit.edu/courses/chemistry",
"https://ocw.mit.edu/courses/earth-atmospheric-and-planetary-sciences",
"https://ocw.mit.edu/courses/mathematics",
"https://ocw.mit.edu/courses/physics",
"https://ocw.mit.edu/courses/sloan-school-of-management",
"https://ocw.mit.edu/courses/athletics-physical-education-and-recreation",
"https://ocw.mit.edu/courses/concourse",
"https://ocw.mit.edu/courses/edgerton-center",
"https://ocw.mit.edu/courses/experimental-study-group",
]
classes = defaultdict(dict)
pattern = r'\((.*?)\)'
for department in departments:
dep_name = department.rsplit('/', 1)[1]
html_doc = requests.get(department).text
soup = BeautifulSoup(html_doc, 'html.parser')
courses = soup.find(attrs={"class": 'courseListDiv'}).find_all(attrs={"class": 'course_title'})
for course in courses:
class_name, time = (i.strip() for i in course.text.strip().split('\n', 1))
classes[dep_name][class_name] = time
print(dep_name, time, class_name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment