Skip to content

Instantly share code, notes, and snippets.

@HandcartCactus
Created June 9, 2024 06:21
Show Gist options
  • Save HandcartCactus/3398f6e852ac5d962f36dcaeec80fdef to your computer and use it in GitHub Desktop.
Save HandcartCactus/3398f6e852ac5d962f36dcaeec80fdef to your computer and use it in GitHub Desktop.
An almost-pure python script to download all files off of Canvas
from typing import Any, Dict, Literal, Optional
import urllib.parse
import urllib.request
import requests
import urllib
import re
import os
import platform
import subprocess
import sys
def sanitize_for_dirname(s:str):
s = re.sub(pattern=r'[^\w\d]', repl='_', string=s.lower())
s = re.sub(pattern=r'([\_]{2,})', repl='_', string=s)
return s
class CanvasAPI:
COURSES_ENDPOINT = 'api/v1/courses'
COURSE_FILES_ENDPOINT = 'api/v1/courses/{course_id}/files'
def __init__(self, scheme:Literal['http', 'https']='https', netloc:str='canvas.instructure.com', access_token:Optional[str]=None, max_notify:int=3):
self.scheme = scheme
self.netloc = netloc
self.access_token = access_token
self.max_notify = max_notify
self.current_notify = 0
def _create_url(self, path:str, query:Dict[str, Any]):
return urllib.parse.urlunsplit((
self.scheme,
self.netloc,
path,
urllib.parse.urlencode(query),
None
))
def courses_raw(self):
url = self._create_url(path=self.COURSES_ENDPOINT, query={'access_token':self.access_token,'page':1,'per_page':100})
return requests.get(url=url)
def courses(self,):
return [{k:v for k,v in c.items() if k in ['id', 'name', 'course_code']} for c in self.courses_raw().json() if c['workflow_state']=='available']
def file_list_raw(self, course_id:int, page:int=1):
url = self._create_url(path=self.COURSE_FILES_ENDPOINT.format(course_id=course_id), query={'access_token':self.access_token,'page':page,'per_page':20})
return requests.get(url=url)
def notify(self, message:str):
if platform.system() == 'Linux':
if self.current_notify <= self.max_notify:
subprocess.run(["notify-send", message])
self.current_notify += 1
def download_files(self, root_dir:str):
newly_downloaded = []
for course in api.courses():
course_name = sanitize_for_dirname(course['name'])
course_dir = os.path.join(root_dir, course_name)
if not os.path.isdir(course_dir):
os.mkdir(course_dir)
has_results = True
page=1
while has_results:
file_list_results = api.file_list_raw(course_id=course['id'], page=page).json()
has_results = bool(file_list_results)
for file in file_list_results:
proposed_fpath = os.path.join(course_dir, file['display_name'])
if not os.path.isfile(proposed_fpath):
try:
urllib.request.urlretrieve(url=file['url'], filename=proposed_fpath)
self.notify(f"CanvasDaemon: Downloaded '{file['display_name']}' in {course_name}.")
newly_downloaded.append(proposed_fpath)
except Exception as e:
pass
page += 1
if __name__ == '__main__':
CANVAS_SYNC_DIR = '/home/elias/Documents/CanvasSync'
api = CanvasAPI(
netloc='canvas.its.virginia.edu',
access_token='YOUR ACCESS TOKEN HERE',
)
api.download_files(CANVAS_SYNC_DIR)
@HandcartCactus
Copy link
Author

Create a virtual environment with dependencies

python3 -m venv /path/to/venv
source /path/to/venv/bin/activate
/path/to/venv/bin/python -m pip install requests

Edit linux crontab file

crontab -e

Add this to make the daemon run every two minutes

*/2 * * * * /path/to/venv/bin/python /path/to/canvas_daemon.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment