Created
June 9, 2024 06:21
-
-
Save HandcartCactus/3398f6e852ac5d962f36dcaeec80fdef to your computer and use it in GitHub Desktop.
An almost-pure python script to download all files off of Canvas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Any, Dict, Literal, Optional | |
import urllib.parse | |
import urllib.request | |
import requests | |
import urllib | |
import re | |
import os | |
import platform | |
import subprocess | |
import sys | |
def sanitize_for_dirname(s:str): | |
s = re.sub(pattern=r'[^\w\d]', repl='_', string=s.lower()) | |
s = re.sub(pattern=r'([\_]{2,})', repl='_', string=s) | |
return s | |
class CanvasAPI: | |
COURSES_ENDPOINT = 'api/v1/courses' | |
COURSE_FILES_ENDPOINT = 'api/v1/courses/{course_id}/files' | |
def __init__(self, scheme:Literal['http', 'https']='https', netloc:str='canvas.instructure.com', access_token:Optional[str]=None, max_notify:int=3): | |
self.scheme = scheme | |
self.netloc = netloc | |
self.access_token = access_token | |
self.max_notify = max_notify | |
self.current_notify = 0 | |
def _create_url(self, path:str, query:Dict[str, Any]): | |
return urllib.parse.urlunsplit(( | |
self.scheme, | |
self.netloc, | |
path, | |
urllib.parse.urlencode(query), | |
None | |
)) | |
def courses_raw(self): | |
url = self._create_url(path=self.COURSES_ENDPOINT, query={'access_token':self.access_token,'page':1,'per_page':100}) | |
return requests.get(url=url) | |
def courses(self,): | |
return [{k:v for k,v in c.items() if k in ['id', 'name', 'course_code']} for c in self.courses_raw().json() if c['workflow_state']=='available'] | |
def file_list_raw(self, course_id:int, page:int=1): | |
url = self._create_url(path=self.COURSE_FILES_ENDPOINT.format(course_id=course_id), query={'access_token':self.access_token,'page':page,'per_page':20}) | |
return requests.get(url=url) | |
def notify(self, message:str): | |
if platform.system() == 'Linux': | |
if self.current_notify <= self.max_notify: | |
subprocess.run(["notify-send", message]) | |
self.current_notify += 1 | |
def download_files(self, root_dir:str): | |
newly_downloaded = [] | |
for course in api.courses(): | |
course_name = sanitize_for_dirname(course['name']) | |
course_dir = os.path.join(root_dir, course_name) | |
if not os.path.isdir(course_dir): | |
os.mkdir(course_dir) | |
has_results = True | |
page=1 | |
while has_results: | |
file_list_results = api.file_list_raw(course_id=course['id'], page=page).json() | |
has_results = bool(file_list_results) | |
for file in file_list_results: | |
proposed_fpath = os.path.join(course_dir, file['display_name']) | |
if not os.path.isfile(proposed_fpath): | |
try: | |
urllib.request.urlretrieve(url=file['url'], filename=proposed_fpath) | |
self.notify(f"CanvasDaemon: Downloaded '{file['display_name']}' in {course_name}.") | |
newly_downloaded.append(proposed_fpath) | |
except Exception as e: | |
pass | |
page += 1 | |
if __name__ == '__main__': | |
CANVAS_SYNC_DIR = '/home/elias/Documents/CanvasSync' | |
api = CanvasAPI( | |
netloc='canvas.its.virginia.edu', | |
access_token='YOUR ACCESS TOKEN HERE', | |
) | |
api.download_files(CANVAS_SYNC_DIR) |
Create a virtual environment with dependencies
python3 -m venv /path/to/venv
source /path/to/venv/bin/activate
/path/to/venv/bin/python -m pip install requests
Edit linux crontab file
crontab -e
Add this to make the daemon run every two minutes
*/2 * * * * /path/to/venv/bin/python /path/to/canvas_daemon.py
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://community.canvaslms.com/t5/Student-Guide/How-do-I-manage-API-access-tokens-as-a-student/ta-p/273