Skip to content

Instantly share code, notes, and snippets.

@ongyx
Last active May 16, 2023 07:56
Show Gist options
  • Save ongyx/dcc79263d8a6d67fae19ce15f69a247d to your computer and use it in GitHub Desktop.
Save ongyx/dcc79263d8a6d67fae19ce15f69a247d to your computer and use it in GitHub Desktop.
Brightspace Extractor

bsx

Short script to correctly extract files from Brightspace zip files.

Prerequisites

bsx requires at least Python 3.9 and Beautiful Soup 4 to be installed. It should work on any of Windows/Linux/macOS, although I've only tested it on the first.

pip install beautifulsoup4

Usage

python bsx.py <path to brightspace zip> <dest>
"""bsx: brightspace extractor"""
import pathlib
import sys
import zipfile
from urllib.parse import urlparse, unquote
from bs4 import BeautifulSoup
def find_html(zip: zipfile.ZipFile) -> str:
html: str | None = None
for file in zip.namelist():
# only find the first HTML file
if file.endswith(".html"):
html = file
break
if html is None:
raise ValueError("HTML file not found")
return html
def parse(zip: zipfile.ZipFile) -> dict[str, str]:
files: dict[str, str] = {}
with zip.open(find_html(zip)) as f:
soup = BeautifulSoup(f, features="html5lib")
for link in soup.find_all("a"):
if link.string is not None:
# map XID to actual filename
url = unquote(urlparse(link.get("href")).path)
files[url] = link.string
return files
def extract(zip: zipfile.ZipFile, dest: pathlib.Path):
for path, name in parse(zip).items():
try:
info = zip.getinfo(path)
except KeyError:
print(f"error: extract: file {path} not found, skipping")
continue
info.filename = name
print(f"extracting {path} -> {name}")
zip.extract(info, dest)
def main() -> int:
print(__doc__)
if len(sys.argv) < 3:
print(f"usage: {sys.argv[0]} <path to zip file> <dest>")
return 1
path = pathlib.Path(sys.argv[1]).resolve()
dest = pathlib.Path(sys.argv[2]).resolve()
with zipfile.ZipFile(path) as z:
try:
extract(z, dest)
except Exception as e:
print(f"error: {type(e).__name__}: {e}")
return 1
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment