Last active
November 8, 2022 03:33
-
-
Save daniellivingston/fb17514e8ad11ed3284c7fc172959a36 to your computer and use it in GitHub Desktop.
Script used to download course videos for CS-6260 in GATech's OMSCS program.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import requests | |
import json | |
import re | |
from pathlib import Path | |
# Download directory (default: `./videos/`) | |
video_download_directory = Path(__file__).parent / "videos" | |
# You do not need to change the below paths for CS 6260, unless you are getting errors. | |
# To override these paths for different courses: | |
# 1. Open the Module in Canvas/Instructure in your web browser | |
# 2. Open Developer Tools and switch to the Elements tab | |
# 3. Search for the <iframe ...> block which contains the Kaltura player | |
# 4. Copy the URL in the 'src' attribute of the iframe: '<iframe src="url/to/copy/" ...>' | |
# 5. Paste that URL below! | |
urls = { | |
"Module 1": "https://www.kaltura.com/p/346151/sp/34615100/embedIframeJs/uiconf_id/40436601/partner_id/346151/widget_id/1_f6evmb09?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=0_9kac8by8", | |
"Module 2": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_bmj7e3nr?iframeembed=true&playerId=kaltura_player_5d9f781472253&flashvars%5BplaylistAPI.kpl0Id%5D=1_ra1z4nlz", | |
"Module 3": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_8jdgt9i8?iframeembed=true&playerId=kaltura_player_5d9600130a31f&flashvars%5BplaylistAPI.kpl0Id%5D=1_vq0ai9y3", | |
"Module 4": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_20lh06gj?iframeembed=true&playerId=kaltura_player_5dcc254310941&flashvars%5BplaylistAPI.kpl0Id%5D=1_119gk2sn", | |
"Module 5": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_6vja8kmm?iframeembed=true&playerId=kaltura_player_5dcc2deae15f2&flashvars%5BplaylistAPI.kpl0Id%5D=1_cu0h184t", | |
"Module 6": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_a52lhhci?iframeembed=true&playerId=kaltura_player_5dd5859cb9a31&flashvars%5BplaylistAPI.kpl0Id%5D=1_owrn7s81", | |
"Module 7": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_2e1vfxdw?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_6f9xgnab", | |
"Module 8": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_furs9e2o?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_33hdc0hj", | |
"Module 9": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_3q1f4j2p?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_evup435d", | |
"Module 10": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_y2c2nlmw?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_xwcxwpyg&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D", | |
"Module 11a": "https://cdnapisec.kaltura.com/html5/html5lib/v2.98/mwEmbedFrame.php/p/2019031/uiconf_id/40436601?wid=1_rf6nhdbc&iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_v8a6lpjr&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D", | |
"Module 11b": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_ah0flt90?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_6k60w2rd&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D", | |
"Module 12": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_cgtf7ezu?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_6qb5erj0&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D", | |
"Module 13": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_nag3mygt?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_mca8vfg5&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D", | |
"Module 14": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_seec775y?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_ybx2dr71&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D", | |
"Module 15": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_efvwfeyt?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_bmetj6gc&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D", | |
"Module 16": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_1ksqi1zq?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_y2xesv8r&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D", | |
"Module 17": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_6nf5buc4?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_r07ddnsj&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D", | |
"Module 18": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_0cqhitgy?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_zphe2pv3&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D" | |
} | |
def get_url_contents(url: str) -> str: | |
"""Downloads the requested URL and returns its contents as a UTF-8 string.""" | |
print(f"--> Downloading '{url}'") | |
return requests.get(url).content.decode('utf-8') | |
def get_kaltura_playlist_metadata(html: str) -> dict: | |
"""Parses an HTML string for Kaltura media player metadata. Returns results as a dict.""" | |
x = re.findall(r"(\s+window\.kalturaIframePackageData\s=\s+)(.+);", html) | |
try: | |
return json.loads(x[0][1]) | |
except AttributeError as e: | |
with open('error.log', 'w') as f: | |
f.write(html) | |
raise AttributeError("Failed to parse HTML: could not find `window.kalturaIframePackageData`.") | |
def idk(x): | |
'''Despite the name, this just pulls desired metadata from the JSON-turned-Dict that was downloaded earlier.''' | |
results: dict = {} | |
for playlistId in x['playlistResult']: | |
playlist = x['playlistResult'][playlistId] | |
results[playlist['name']] = [ | |
{ | |
"name": item['name'], | |
"url": item['downloadUrl'], | |
"thumbnail": item['thumbnailUrl'] | |
} | |
for item in playlist['items'] | |
] | |
return results | |
def download_video(url: str, filename: str, chunk_size:int = 8192) -> None: | |
"""Downloads a video to local storage. Downlaods in chunks to minimize memory usage.""" | |
print(f"Downloading '{url}' to '{filename}'...") | |
with requests.get(url, stream=True, allow_redirects=True) as r: | |
r.raise_for_status() | |
file_size = int(r.headers.get('Content-Length', 0)) | |
bytes_read = 0 | |
with open(filename, 'wb') as f: | |
for chunk in r.iter_content(chunk_size=chunk_size): | |
progress = int(100. * bytes_read / file_size) | |
print(f"\r{progress}%..." + " "*10, end='') | |
f.write(chunk) | |
bytes_read += chunk_size | |
print("\rCompleted.") | |
def download_videos(metadata: dict): | |
sanitize = lambda filename: re.sub(r'[^\w\-_\. ]', '_', filename) | |
for module_key in metadata.keys(): | |
module_dir = video_download_directory / sanitize(module_key) | |
module_dir.mkdir(parents=True, exist_ok=True) | |
for video in metadata[module_key]: | |
filename = module_dir / sanitize(video['name'] + '.mp4') | |
url = video['url'] | |
try: | |
download_video(str(url), str(filename)) | |
except Exception as error: | |
print(f"ERROR: failed to download {video['name']} ({error=})") | |
continue | |
if __name__ == '__main__': | |
results = dict() | |
for module in urls.keys(): | |
print(f"Processing {module}...") | |
html = get_url_contents(urls[module]) | |
metadata = get_kaltura_playlist_metadata(html) | |
results.update(idk(metadata)) | |
# Delete this if you want - just writes module/video metadata to disk | |
with open('results.json', 'w') as f: | |
f.write(json.dumps(results)) | |
print(results) | |
download_videos(results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment