Last active
February 23, 2016 21:06
-
-
Save ryanlovett/e9558f6e8c0b81d7dfca to your computer and use it in GitHub Desktop.
Download course roster from Canvas LMI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
''' | |
To obtain an access token in the Canvas LMI (e.g. bcourses, uconline), | |
click own user name at top, scroll down to Approved Integrations, click New | |
# Tests with curl | |
curl https://cole2.uconline.edu/api/v1/courses/NNNNNN/users -X GET \ | |
-F "per_page=5000" -F "include[]=email" -F "enrollment_type=student" \ | |
-H "Authorization: Bearer <token>" | |
curl https://bcourses.berkeley.edu/api/v1/courses/NNNNNNN/users -X GET \ | |
-F "per_page=5000" -F "include[]=email" -F "enrollment_type=student" \ | |
-H "Authorization: Bearer <token>" | |
example user = { | |
u'sortable_name': u'Smith, John', | |
u'name': u'John Smith', | |
u'short_name': u'John Smith', | |
u'login_id': u'somestudent@berkeley.edu', | |
u'sis_user_id': u'berkeley-23456789', | |
u'id': 654321, | |
u'sis_login_id': u'somestudent@berkeley.edu', | |
}, | |
bcourses user = { | |
'sortable_name': 'SMITH, JOHN', | |
'name': 'JOHN SMITH', | |
'short_name': 'JOHN SMITH', | |
'integration_id': None, | |
'login_id': '1234321', | |
'sis_user_id': '23455432', | |
'id': 5432123, | |
'sis_login_id': '1234321', | |
'email': 'johnsmith@berkeley.ed', | |
} | |
''' | |
import sys | |
import os | |
import time | |
import optparse | |
import json | |
import gzip | |
import cPickle | |
import requests | |
def parseLinkHeader(lh): | |
'''Parse the Link HTTP header to see how the server has paginated users. | |
It is of the form: "<URL>; rel="context",<URL>; rel="context",..." | |
Return a dictionary of 'current', 'next', 'last', 'first' (and 'prev') | |
links.''' | |
links = map(lambda x: x.split('; rel='), lh.split(',')) | |
lc = {} | |
for link in links: | |
url = link[0][1:-1] # trim '<' and '>' | |
cxt = link[1][1:-1] # trim '"' | |
lc[cxt] = url | |
return lc | |
def getUsers(base_uri, http_method, params, headers): | |
'''Retrieve students from Canvas LMI by recursively collecting paginated | |
output.''' | |
if options.debug: print 'getUsers:', base_uri | |
r = requests.get(base_uri, params=params, headers=headers) | |
# Load our data | |
data = r.json() | |
if options.debug: print len(data) | |
try: | |
links = parseLinkHeader(r.headers['link']) | |
except KeyError, e: | |
return data | |
# We are not at the last page | |
if links['current'] != links['last']: | |
data += getUsers(links['next'], http_method, params, headers) | |
return data | |
def sid(s): | |
if '-' in s: s = s.split('-')[1] | |
return s | |
# MAIN | |
CACHE_FILE = '/var/cache/apache2/canvas-roster.pkl' | |
CACHE_EXPIRE = 300 | |
# python's gzip module cannot decompress a partial stream without the checksum | |
# at the end. The server doesn't seem to be providing that so we trick the | |
# module into skipping checksum verification. | |
# http://stackoverflow.com/questions/1732709/unzipping-part-of-a-gz-file-using-python | |
# If we don't, httplib2's client will return empty content after reporting: | |
# "Content purported to be compressed with gzip but failed to decompress." | |
# I tried to ask cole2.uconline.edu for deflate or identity encodings but | |
# it won't do either. | |
gzip.GzipFile._read_eof = lambda *args, **kwargs: None | |
# Parse command-line options (for development, when not cgi) | |
parser = optparse.OptionParser() | |
parser.add_option("-d", "--debug", action='store_true', default=False, | |
help="Debug mode; [default=%default]") | |
(options, args) = parser.parse_args() | |
# HTTP | |
http_method = 'GET' | |
authorization_method = 'Bearer' | |
# bCourses | |
courses = { | |
'somekey':{ | |
'server':'bcourses.berkeley.edu', | |
'id':1357975, | |
'token':'19~U9asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfas', | |
}, | |
'someotherkey':{ | |
'server':'cole2.uconline.edu', | |
'id':468642, | |
'token':'3~kZqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqw', | |
} | |
} | |
COURSE = 'somekey' | |
base_uri = 'https://%s/api/v1/courses/%s/users' % \ | |
(courses[COURSE]['server'], courses[COURSE]['id']) | |
TOKEN = courses[COURSE]['token'] | |
headers = { | |
"Authorization":"%s %s" % (authorization_method, TOKEN), | |
} | |
# Filter by students | |
params = { | |
"per_page":500, | |
"include[]":"email", | |
"enrollment_type":"student" | |
} | |
cache_exists = os.path.exists(CACHE_FILE) | |
# How old is the cache? | |
if cache_exists: stat = os.stat(CACHE_FILE) | |
# If we don't have one or if it is older than 5 minutes | |
if not cache_exists or \ | |
stat.st_mtime + CACHE_EXPIRE < time.mktime(time.localtime()): | |
users = getUsers(base_uri, http_method, params, headers) | |
f = open(CACHE_FILE, 'wb') | |
cPickle.dump(users, f) | |
f.close() | |
if len(users) == 0 and os.path.exists(CACHE_FILE): | |
f = open(CACHE_FILE, 'rb') | |
users = cPickle.load(f) | |
f.close() | |
# Pretty print | |
print json.dumps(users, separators=(',', ': '), indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment