ryanlovett/canvas-roster.py

## canvas-roster.py
#!/usr/bin/python
'''
	To obtain an access token in the Canvas LMI (e.g. bcourses, uconline),
	click own user name at top, scroll down to Approved Integrations, click New

	# Tests with curl
	curl https://cole2.uconline.edu/api/v1/courses/NNNNNN/users -X GET \
		-F "per_page=5000" -F "include[]=email" -F "enrollment_type=student" \
		-H "Authorization: Bearer <token>"
	curl https://bcourses.berkeley.edu/api/v1/courses/NNNNNNN/users -X GET \
		-F "per_page=5000" -F "include[]=email" -F "enrollment_type=student" \
		-H "Authorization: Bearer <token>"

	example user = {
		u'sortable_name': u'Smith, John',
		u'name': u'John Smith',
		u'short_name': u'John Smith',

		u'login_id': u'somestudent@berkeley.edu',
		u'sis_user_id': u'berkeley-23456789',
		u'id': 654321,
		u'sis_login_id': u'somestudent@berkeley.edu',

	},

	bcourses user = {
		'sortable_name': 'SMITH, JOHN',
		'name': 'JOHN SMITH',
		'short_name': 'JOHN SMITH',
		'integration_id': None,
		'login_id': '1234321',
		'sis_user_id': '23455432',
		'id': 5432123,
		'sis_login_id': '1234321',
		'email': 'johnsmith@berkeley.ed',
	}
'''


import sys
import os
import time
import optparse
import json
import gzip
import cPickle
import requests

def parseLinkHeader(lh):
	'''Parse the Link HTTP header to see how the server has paginated users.
	  It is of the form: "<URL>; rel="context",<URL>; rel="context",..."
	  Return a dictionary of 'current', 'next', 'last', 'first' (and 'prev')
	  links.'''
	links = map(lambda x: x.split('; rel='), lh.split(','))
	lc = {}
	for link in links:
		url = link[0][1:-1] # trim '<' and '>'
		cxt = link[1][1:-1] # trim '"'
		lc[cxt] = url
	return lc

def getUsers(base_uri, http_method, params, headers):
	'''Retrieve students from Canvas LMI by recursively collecting paginated
	   output.'''
	if options.debug: print 'getUsers:', base_uri

	r = requests.get(base_uri, params=params, headers=headers)
	# Load our data
	data = r.json()
	if options.debug: print len(data)

	try:
		links = parseLinkHeader(r.headers['link'])
	except KeyError, e:
		return data

	# We are not at the last page
	if links['current'] != links['last']:
		data += getUsers(links['next'], http_method, params, headers)

	return data

def sid(s):
	if '-' in s: s = s.split('-')[1]
	return s

# MAIN
CACHE_FILE = '/var/cache/apache2/canvas-roster.pkl'
CACHE_EXPIRE = 300

# python's gzip module cannot decompress a partial stream without the checksum
# at the end. The server doesn't seem to be providing that so we trick the
# module into skipping checksum verification.
# http://stackoverflow.com/questions/1732709/unzipping-part-of-a-gz-file-using-python
# If we don't, httplib2's client will return empty content after reporting:
# "Content purported to be compressed with gzip but failed to decompress."
# I tried to ask cole2.uconline.edu for deflate or identity encodings but
# it won't do either.
gzip.GzipFile._read_eof = lambda *args, **kwargs: None

# Parse command-line options (for development, when not cgi)
parser = optparse.OptionParser()
parser.add_option("-d", "--debug", action='store_true', default=False,
	help="Debug mode; [default=%default]")
(options, args) = parser.parse_args()

# HTTP
http_method = 'GET'
authorization_method = 'Bearer'

# bCourses
courses = {
'somekey':{
  'server':'bcourses.berkeley.edu',
  'id':1357975,
  'token':'19~U9asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfas',
},
'someotherkey':{
  'server':'cole2.uconline.edu',
  'id':468642,
  'token':'3~kZqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqw',
}
}
COURSE = 'somekey'

base_uri = 'https://%s/api/v1/courses/%s/users' % \
	(courses[COURSE]['server'], courses[COURSE]['id'])
TOKEN = courses[COURSE]['token']

headers = {
	"Authorization":"%s %s" % (authorization_method, TOKEN),
}

# Filter by students
params = {
	"per_page":500,
	"include[]":"email",
	"enrollment_type":"student"
}

cache_exists = os.path.exists(CACHE_FILE)
# How old is the cache?
if cache_exists: stat = os.stat(CACHE_FILE)

# If we don't have one or if it is older than 5 minutes
if not cache_exists or \
	stat.st_mtime + CACHE_EXPIRE < time.mktime(time.localtime()):
	users = getUsers(base_uri, http_method, params, headers)
	f = open(CACHE_FILE, 'wb')
	cPickle.dump(users, f)
	f.close()

if len(users) == 0 and os.path.exists(CACHE_FILE):
	f = open(CACHE_FILE, 'rb')
	users = cPickle.load(f)
	f.close()

# Pretty print
print json.dumps(users, separators=(',', ': '), indent=4)
	#!/usr/bin/python
	'''
	To obtain an access token in the Canvas LMI (e.g. bcourses, uconline),
	click own user name at top, scroll down to Approved Integrations, click New

	# Tests with curl
	curl https://cole2.uconline.edu/api/v1/courses/NNNNNN/users -X GET \
	-F "per_page=5000" -F "include[]=email" -F "enrollment_type=student" \
	-H "Authorization: Bearer <token>"
	curl https://bcourses.berkeley.edu/api/v1/courses/NNNNNNN/users -X GET \
	-F "per_page=5000" -F "include[]=email" -F "enrollment_type=student" \
	-H "Authorization: Bearer <token>"

	example user = {
	u'sortable_name': u'Smith, John',
	u'name': u'John Smith',
	u'short_name': u'John Smith',

	u'login_id': u'somestudent@berkeley.edu',
	u'sis_user_id': u'berkeley-23456789',
	u'id': 654321,
	u'sis_login_id': u'somestudent@berkeley.edu',

	},

	bcourses user = {
	'sortable_name': 'SMITH, JOHN',
	'name': 'JOHN SMITH',
	'short_name': 'JOHN SMITH',
	'integration_id': None,
	'login_id': '1234321',
	'sis_user_id': '23455432',
	'id': 5432123,
	'sis_login_id': '1234321',
	'email': 'johnsmith@berkeley.ed',
	}
	'''


	import sys
	import os
	import time
	import optparse
	import json
	import gzip
	import cPickle
	import requests

	def parseLinkHeader(lh):
	'''Parse the Link HTTP header to see how the server has paginated users.
	It is of the form: "<URL>; rel="context",<URL>; rel="context",..."
	Return a dictionary of 'current', 'next', 'last', 'first' (and 'prev')
	links.'''
	links = map(lambda x: x.split('; rel='), lh.split(','))
	lc = {}
	for link in links:
	url = link[0][1:-1] # trim '<' and '>'
	cxt = link[1][1:-1] # trim '"'
	lc[cxt] = url
	return lc

	def getUsers(base_uri, http_method, params, headers):
	'''Retrieve students from Canvas LMI by recursively collecting paginated
	output.'''
	if options.debug: print 'getUsers:', base_uri

	r = requests.get(base_uri, params=params, headers=headers)
	# Load our data
	data = r.json()
	if options.debug: print len(data)

	try:
	links = parseLinkHeader(r.headers['link'])
	except KeyError, e:
	return data

	# We are not at the last page
	if links['current'] != links['last']:
	data += getUsers(links['next'], http_method, params, headers)

	return data

	def sid(s):
	if '-' in s: s = s.split('-')[1]
	return s

	# MAIN
	CACHE_FILE = '/var/cache/apache2/canvas-roster.pkl'
	CACHE_EXPIRE = 300

	# python's gzip module cannot decompress a partial stream without the checksum
	# at the end. The server doesn't seem to be providing that so we trick the
	# module into skipping checksum verification.
	# http://stackoverflow.com/questions/1732709/unzipping-part-of-a-gz-file-using-python
	# If we don't, httplib2's client will return empty content after reporting:
	# "Content purported to be compressed with gzip but failed to decompress."
	# I tried to ask cole2.uconline.edu for deflate or identity encodings but
	# it won't do either.
	gzip.GzipFile._read_eof = lambda args, *kwargs: None

	# Parse command-line options (for development, when not cgi)
	parser = optparse.OptionParser()
	parser.add_option("-d", "--debug", action='store_true', default=False,
	help="Debug mode; [default=%default]")
	(options, args) = parser.parse_args()

	# HTTP
	http_method = 'GET'
	authorization_method = 'Bearer'

	# bCourses
	courses = {
	'somekey':{
	'server':'bcourses.berkeley.edu',
	'id':1357975,
	'token':'19~U9asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfas',
	},
	'someotherkey':{
	'server':'cole2.uconline.edu',
	'id':468642,
	'token':'3~kZqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqw',
	}
	}
	COURSE = 'somekey'

	base_uri = 'https://%s/api/v1/courses/%s/users' % \
	(courses[COURSE]['server'], courses[COURSE]['id'])
	TOKEN = courses[COURSE]['token']

	headers = {
	"Authorization":"%s %s" % (authorization_method, TOKEN),
	}

	# Filter by students
	params = {
	"per_page":500,
	"include[]":"email",
	"enrollment_type":"student"
	}

	cache_exists = os.path.exists(CACHE_FILE)
	# How old is the cache?
	if cache_exists: stat = os.stat(CACHE_FILE)

	# If we don't have one or if it is older than 5 minutes
	if not cache_exists or \
	stat.st_mtime + CACHE_EXPIRE < time.mktime(time.localtime()):
	users = getUsers(base_uri, http_method, params, headers)
	f = open(CACHE_FILE, 'wb')
	cPickle.dump(users, f)
	f.close()

	if len(users) == 0 and os.path.exists(CACHE_FILE):
	f = open(CACHE_FILE, 'rb')
	users = cPickle.load(f)
	f.close()

	# Pretty print
	print json.dumps(users, separators=(',', ': '), indent=4)