Skip to content

Instantly share code, notes, and snippets.

@vinovator
Last active July 28, 2023 05:03
Show Gist options
  • Save vinovator/0f5d0df1e1a863a93568 to your computer and use it in GitHub Desktop.
Save vinovator/0f5d0df1e1a863a93568 to your computer and use it in GitHub Desktop.
Python program to download course content for multiple Udacity courses neatly arranged within a folder structure
# UdacityDownload.py
# Python 2.7.6
"""
Python script to download course content from udacity courses
- Creates folders as per course names
- Downloads all the zip files
- Extract content from zip file
- Finally delete the zip file
Multiple course content can be downloaded from list
"""
import requests
from BeautifulSoup import BeautifulSoup
import zipfile
import os
# Parent directory under which all content to be downloaded
# ../../ notation will take 3 levels up
base_dir = "../../Vinoth/MOOC/Udacity/"
# Udacity id of the courses of interest
# Configure this list to download multiple course content
# course_lst = ["ud617", "ud359"]
course_lst = ["ud675", "ud741", "ud820"]
# Dict to map course id with course name
courses = dict()
# The catalog page lists out all courses available for download
# Use this page to extract course name from course id
catalog_url = "https://www.udacity.com/wiki/downloads"
catalog_resp = requests.get(catalog_url)
catalog_soup = BeautifulSoup(catalog_resp.content)
lis = catalog_soup.findAll("li")
for li in lis:
for a in li("a"):
# case insensitive comparison
if a.getText().lower() in [course.lower() for course in course_lst]:
courses[a.getText()] = li.getText()
for course_id in courses:
print("***Downloading {}".format(courses[course_id]))
# Construct the path to download content, and strip unwanted characters
download_dir = os.path.join(base_dir, courses[course_id].replace(":", ""))
print download_dir
if not os.path.exists(download_dir):
os.makedirs(download_dir)
# ud359 is the id for "Intro to Data Science" course
# Full download catalog - https://www.udacity.com/wiki/downloads
course_url = "https://www.udacity.com/wiki/" + course_id + "/downloads"
resp = requests.get(course_url)
print("Opening url {}".format(course_url))
soup = BeautifulSoup(resp.content)
# Video links are within a <li> tag
lis = soup.findAll("li")
for li in lis:
for a in li("a"):
# Filter for zip files
if(a.get("href").split(".")[-1] == "zip"):
link = a.get("href") # Video download link
name = a.getText() # Name of file
print ("{}: {}".format(name, link))
vresp = requests.get(link)
with open(os.path.join(download_dir, name), "wb") as video:
for chunk in vresp.iter_content(chunk_size=1024):
if chunk:
video.write(chunk)
print("Video zip files downloaded in {}".format(
os.path.join(download_dir, name)))
# Unzip the file and save in current location
try:
with zipfile.ZipFile(
os.path.join(download_dir, name), "r") as zfile:
# Save the extracted file under a folder with same name
# .zip extension is striped out get folder name
zfile.extractall(
os.path.join(download_dir, name.split(".")[0]))
print("Zip file extracted")
except Exception as e:
print (e)
continue
# After extraction remove original zip file
try:
os.remove(os.path.join(download_dir, name))
print("Zip file deleted")
except Exception as e:
print (e)
continue
@vinovator
Copy link
Author

The script doesn't seem to work anymore. The structure of Udacity download site has changed. #TODO - rewrite the script to match the new structure.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment