Skip to content

Instantly share code, notes, and snippets.

@seanbreckenridge
Last active June 23, 2022 16:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seanbreckenridge/44854575b03e7f643b19bf40cf7e21bd to your computer and use it in GitHub Desktop.
Save seanbreckenridge/44854575b03e7f643b19bf40cf7e21bd to your computer and use it in GitHub Desktop.
A script to download all the videos for the Berkley Computer Science 61A class
# A script to download all the videos from
# https://archive.org/details/ucberkeley-webcast-PL3E89002AA9B9879E?sort=titleSorter
# python3.7
# pip3 install --user selenium youtube-dl
import youtube_dl
from selenium import webdriver
driver = None
def main():
global driver
table = "https://archive.org/details/ucberkeley-webcast-PL3E89002AA9B9879E?sort=titleSorter"
driver = webdriver.Chrome()
driver.get(table)
videos = driver.find_elements_by_css_selector("div.item-ia > div > div > a")
links = list(map(lambda x: x.get_attribute("href"), videos))
for l in links:
driver.get(l)
link_for_video_download = driver.find_element_by_partial_link_text("MPEG4").get_attribute('href')
print(link_for_video_download)
ydl = youtube_dl.YoutubeDL()
with ydl:
ydl.extract_info(link_for_video_download, download=True)
if __name__ == "__main__":
try:
main()
finally:
if driver: driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment