Skip to content

Instantly share code, notes, and snippets.

@ryanlovett
Last active February 23, 2016 21:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryanlovett/e9558f6e8c0b81d7dfca to your computer and use it in GitHub Desktop.
Save ryanlovett/e9558f6e8c0b81d7dfca to your computer and use it in GitHub Desktop.
Download course roster from Canvas LMI
#!/usr/bin/python
'''
To obtain an access token in the Canvas LMI (e.g. bcourses, uconline),
click own user name at top, scroll down to Approved Integrations, click New
# Tests with curl
curl https://cole2.uconline.edu/api/v1/courses/NNNNNN/users -X GET \
-F "per_page=5000" -F "include[]=email" -F "enrollment_type=student" \
-H "Authorization: Bearer <token>"
curl https://bcourses.berkeley.edu/api/v1/courses/NNNNNNN/users -X GET \
-F "per_page=5000" -F "include[]=email" -F "enrollment_type=student" \
-H "Authorization: Bearer <token>"
example user = {
u'sortable_name': u'Smith, John',
u'name': u'John Smith',
u'short_name': u'John Smith',
u'login_id': u'somestudent@berkeley.edu',
u'sis_user_id': u'berkeley-23456789',
u'id': 654321,
u'sis_login_id': u'somestudent@berkeley.edu',
},
bcourses user = {
'sortable_name': 'SMITH, JOHN',
'name': 'JOHN SMITH',
'short_name': 'JOHN SMITH',
'integration_id': None,
'login_id': '1234321',
'sis_user_id': '23455432',
'id': 5432123,
'sis_login_id': '1234321',
'email': 'johnsmith@berkeley.ed',
}
'''
import sys
import os
import time
import optparse
import json
import gzip
import cPickle
import requests
def parseLinkHeader(lh):
'''Parse the Link HTTP header to see how the server has paginated users.
It is of the form: "<URL>; rel="context",<URL>; rel="context",..."
Return a dictionary of 'current', 'next', 'last', 'first' (and 'prev')
links.'''
links = map(lambda x: x.split('; rel='), lh.split(','))
lc = {}
for link in links:
url = link[0][1:-1] # trim '<' and '>'
cxt = link[1][1:-1] # trim '"'
lc[cxt] = url
return lc
def getUsers(base_uri, http_method, params, headers):
'''Retrieve students from Canvas LMI by recursively collecting paginated
output.'''
if options.debug: print 'getUsers:', base_uri
r = requests.get(base_uri, params=params, headers=headers)
# Load our data
data = r.json()
if options.debug: print len(data)
try:
links = parseLinkHeader(r.headers['link'])
except KeyError, e:
return data
# We are not at the last page
if links['current'] != links['last']:
data += getUsers(links['next'], http_method, params, headers)
return data
def sid(s):
if '-' in s: s = s.split('-')[1]
return s
# MAIN
CACHE_FILE = '/var/cache/apache2/canvas-roster.pkl'
CACHE_EXPIRE = 300
# python's gzip module cannot decompress a partial stream without the checksum
# at the end. The server doesn't seem to be providing that so we trick the
# module into skipping checksum verification.
# http://stackoverflow.com/questions/1732709/unzipping-part-of-a-gz-file-using-python
# If we don't, httplib2's client will return empty content after reporting:
# "Content purported to be compressed with gzip but failed to decompress."
# I tried to ask cole2.uconline.edu for deflate or identity encodings but
# it won't do either.
gzip.GzipFile._read_eof = lambda *args, **kwargs: None
# Parse command-line options (for development, when not cgi)
parser = optparse.OptionParser()
parser.add_option("-d", "--debug", action='store_true', default=False,
help="Debug mode; [default=%default]")
(options, args) = parser.parse_args()
# HTTP
http_method = 'GET'
authorization_method = 'Bearer'
# bCourses
courses = {
'somekey':{
'server':'bcourses.berkeley.edu',
'id':1357975,
'token':'19~U9asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfas',
},
'someotherkey':{
'server':'cole2.uconline.edu',
'id':468642,
'token':'3~kZqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqw',
}
}
COURSE = 'somekey'
base_uri = 'https://%s/api/v1/courses/%s/users' % \
(courses[COURSE]['server'], courses[COURSE]['id'])
TOKEN = courses[COURSE]['token']
headers = {
"Authorization":"%s %s" % (authorization_method, TOKEN),
}
# Filter by students
params = {
"per_page":500,
"include[]":"email",
"enrollment_type":"student"
}
cache_exists = os.path.exists(CACHE_FILE)
# How old is the cache?
if cache_exists: stat = os.stat(CACHE_FILE)
# If we don't have one or if it is older than 5 minutes
if not cache_exists or \
stat.st_mtime + CACHE_EXPIRE < time.mktime(time.localtime()):
users = getUsers(base_uri, http_method, params, headers)
f = open(CACHE_FILE, 'wb')
cPickle.dump(users, f)
f.close()
if len(users) == 0 and os.path.exists(CACHE_FILE):
f = open(CACHE_FILE, 'rb')
users = cPickle.load(f)
f.close()
# Pretty print
print json.dumps(users, separators=(',', ': '), indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment