Skip to content

Instantly share code, notes, and snippets.

@daniellivingston
Last active November 8, 2022 03:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save daniellivingston/fb17514e8ad11ed3284c7fc172959a36 to your computer and use it in GitHub Desktop.
Save daniellivingston/fb17514e8ad11ed3284c7fc172959a36 to your computer and use it in GitHub Desktop.
Script used to download course videos for CS-6260 in GATech's OMSCS program.
#!/usr/bin/env python3
import requests
import json
import re
from pathlib import Path
# Download directory (default: `./videos/`)
video_download_directory = Path(__file__).parent / "videos"
# You do not need to change the below paths for CS 6260, unless you are getting errors.
# To override these paths for different courses:
# 1. Open the Module in Canvas/Instructure in your web browser
# 2. Open Developer Tools and switch to the Elements tab
# 3. Search for the <iframe ...> block which contains the Kaltura player
# 4. Copy the URL in the 'src' attribute of the iframe: '<iframe src="url/to/copy/" ...>'
# 5. Paste that URL below!
urls = {
"Module 1": "https://www.kaltura.com/p/346151/sp/34615100/embedIframeJs/uiconf_id/40436601/partner_id/346151/widget_id/1_f6evmb09?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=0_9kac8by8",
"Module 2": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_bmj7e3nr?iframeembed=true&playerId=kaltura_player_5d9f781472253&flashvars%5BplaylistAPI.kpl0Id%5D=1_ra1z4nlz",
"Module 3": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_8jdgt9i8?iframeembed=true&playerId=kaltura_player_5d9600130a31f&flashvars%5BplaylistAPI.kpl0Id%5D=1_vq0ai9y3",
"Module 4": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_20lh06gj?iframeembed=true&playerId=kaltura_player_5dcc254310941&flashvars%5BplaylistAPI.kpl0Id%5D=1_119gk2sn",
"Module 5": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_6vja8kmm?iframeembed=true&playerId=kaltura_player_5dcc2deae15f2&flashvars%5BplaylistAPI.kpl0Id%5D=1_cu0h184t",
"Module 6": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_a52lhhci?iframeembed=true&playerId=kaltura_player_5dd5859cb9a31&flashvars%5BplaylistAPI.kpl0Id%5D=1_owrn7s81",
"Module 7": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_2e1vfxdw?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_6f9xgnab",
"Module 8": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_furs9e2o?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_33hdc0hj",
"Module 9": "https://cdnapisec.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_3q1f4j2p?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_evup435d",
"Module 10": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_y2c2nlmw?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_xwcxwpyg&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D",
"Module 11a": "https://cdnapisec.kaltura.com/html5/html5lib/v2.98/mwEmbedFrame.php/p/2019031/uiconf_id/40436601?wid=1_rf6nhdbc&iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_v8a6lpjr&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D",
"Module 11b": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_ah0flt90?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_6k60w2rd&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D",
"Module 12": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_cgtf7ezu?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_6qb5erj0&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D",
"Module 13": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_nag3mygt?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_mca8vfg5&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D",
"Module 14": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_seec775y?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_ybx2dr71&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D",
"Module 15": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_efvwfeyt?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_bmetj6gc&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D",
"Module 16": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_1ksqi1zq?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_y2xesv8r&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D",
"Module 17": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_6nf5buc4?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_r07ddnsj&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D",
"Module 18": "https://www.kaltura.com/p/2019031/sp/201903100/embedIframeJs/uiconf_id/40436601/partner_id/2019031/widget_id/1_0cqhitgy?iframeembed=true&playerId=kaltura_player_&flashvars%5BplaylistAPI.kpl0Id%5D=1_zphe2pv3&flashvars%5Bks%5D=&&flashvars%5BimageDefaultDuration%5D=30&flashvars%5BlocalizationCode%5D=en&flashvars%5BleadWithHTML5%5D=true&flashvars%5BforceMobileHTML5%5D=true&flashvars%5BnextPrevBtn.plugin%5D=true&flashvars%5BsideBarContainer.plugin%5D=true&flashvars%5BsideBarContainer.position%5D=left&flashvars%5BsideBarContainer.clickToClose%5D=true&flashvars%5Bchapters.plugin%5D=true&flashvars%5Bchapters.layout%5D=vertical&flashvars%5Bchapters.thumbnailRotator%5D=false&flashvars%5BstreamSelector.plugin%5D=true&flashvars%5BEmbedPlayer.SpinnerTarget%5D=videoHolder&flashvars%5BdualScreen.plugin%5D=true&flashvars%5Bhotspots.plugin%5D=true&flashvars%5BplaylistAPI.playlistUrl%5D=https://mediaspace.gatech.edu/playlist/details/%7BplaylistAPI.kpl0Id%7D"
}
def get_url_contents(url: str) -> str:
"""Downloads the requested URL and returns its contents as a UTF-8 string."""
print(f"--> Downloading '{url}'")
return requests.get(url).content.decode('utf-8')
def get_kaltura_playlist_metadata(html: str) -> dict:
"""Parses an HTML string for Kaltura media player metadata. Returns results as a dict."""
x = re.findall(r"(\s+window\.kalturaIframePackageData\s=\s+)(.+);", html)
try:
return json.loads(x[0][1])
except AttributeError as e:
with open('error.log', 'w') as f:
f.write(html)
raise AttributeError("Failed to parse HTML: could not find `window.kalturaIframePackageData`.")
def idk(x):
'''Despite the name, this just pulls desired metadata from the JSON-turned-Dict that was downloaded earlier.'''
results: dict = {}
for playlistId in x['playlistResult']:
playlist = x['playlistResult'][playlistId]
results[playlist['name']] = [
{
"name": item['name'],
"url": item['downloadUrl'],
"thumbnail": item['thumbnailUrl']
}
for item in playlist['items']
]
return results
def download_video(url: str, filename: str, chunk_size:int = 8192) -> None:
"""Downloads a video to local storage. Downlaods in chunks to minimize memory usage."""
print(f"Downloading '{url}' to '{filename}'...")
with requests.get(url, stream=True, allow_redirects=True) as r:
r.raise_for_status()
file_size = int(r.headers.get('Content-Length', 0))
bytes_read = 0
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=chunk_size):
progress = int(100. * bytes_read / file_size)
print(f"\r{progress}%..." + " "*10, end='')
f.write(chunk)
bytes_read += chunk_size
print("\rCompleted.")
def download_videos(metadata: dict):
sanitize = lambda filename: re.sub(r'[^\w\-_\. ]', '_', filename)
for module_key in metadata.keys():
module_dir = video_download_directory / sanitize(module_key)
module_dir.mkdir(parents=True, exist_ok=True)
for video in metadata[module_key]:
filename = module_dir / sanitize(video['name'] + '.mp4')
url = video['url']
try:
download_video(str(url), str(filename))
except Exception as error:
print(f"ERROR: failed to download {video['name']} ({error=})")
continue
if __name__ == '__main__':
results = dict()
for module in urls.keys():
print(f"Processing {module}...")
html = get_url_contents(urls[module])
metadata = get_kaltura_playlist_metadata(html)
results.update(idk(metadata))
# Delete this if you want - just writes module/video metadata to disk
with open('results.json', 'w') as f:
f.write(json.dumps(results))
print(results)
download_videos(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment