JonCooperWorks/sas.py

## sas.py
'''
sas.py
Logs in to a student's SAS account and gets their schedule, then parses the relevant information using regex, because
SAS's HTML is too ugly to use a parser.
Todo:
(!) Refactor times returned by the parser to datetime objects to allow for easy time-related operations.
    Or not, because it's just a prototype, and that's a lot of effort for a proof-of-concept
'''

import urllib2
import urllib
import cookielib
import re
from datetime import datetime

class SASBrowser(object):
    '''
    A SAS client, currently only able to log in and retrieve a student's timetable.
    In theory, it should work with any banner system implementation, not just
    UWI SAS.
    '''

    def __init__(
        self,
        login='http://sas.uwimona.edu.jm:9010/pls/data_mona/twbkwbis.P_ValLogin',
        timetable='http://sas.uwimona.edu.jm:9010/pls/data_mona/bwskfshd.P_CrseSchd'
        ):
        #Initialize with login url and timetable url
        self._login_url = login
        self._timetable_url = timetable
        self._opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))

    def _get(self, url):
        '''Send GET request to a URL'''
        request = urllib2.Request(url)
        response = self._opener.open(request)
        return response.read()

    def _post(self, url, values):
        '''Send POST request and handle cookies'''
        data = urllib.urlencode(values)
        headers = {
        'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-GB; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12',
        'Cookie' : 'TESTID=set; accessibility=false'
        }
        request = urllib2.Request(url, data, headers)
        response = self._opener.open(request)
        return response.read()

    def _get_timetable(self):
        return self._get(self._timetable_url)

    def _login(self, username, password):
        '''Logs a user in using their ID/Password combo. If successful, return True, else False'''
        credentials = {'sid' : username, 'PIN' : password}
        response = self._post(self._login_url, credentials)
        return not 'Invalid login information' in response

    def _get_courses(self):
        '''
        Get all courses a student does along with the time and day
        '''
        #Regex to extract cells containing class data from the webpage
        cell_parser = re.compile('<TD.*CLASS="dd.*">(.*)</TD>')
        #Regex to extract the course name, start and end times and location from the page.
        course_parser = re.compile('^(....\s\d\d\d\d-.\d\d)<BR>\d\d\d\d\d\sClass<BR>(\d?\d:\d\d\s.m)-(\d?\d:\d\d\s.m)<BR>(.*)</A>$')
        html = self._get_timetable()
        matches = cell_parser.findall(html)
        for index, match in enumerate(matches):
            if match == '&nbsp;':
                yield {
                'day' : index % 7,
                'slot' : None,
                }
            else:
                course = course_parser.search(match)
                yield {
                    'day' : index % 7,
                    'slot' : {
                            'course' : course.group(1),
                            'start_time' : course.group(2),
                            'end_time' : course.group(3),
                            'location' : course.group(4),
                        },
                    }

    def timetable(self, username, password):
        '''
        Fetch timetable from url and parse it into a list of dicts with subjects, times, etc
        If the login is successful, and the student has courses on their timetable, it will return a list
        of dicts containing the day, course name, start time, end time and location. Otherwise, it will return None.
        Note that days are 0 indexed, beginning with Monday, and ending with Sunday
        '''
        if self._login(username, password):
            return self._get_courses()
        else:
            return None
	'''
	sas.py
	Logs in to a student's SAS account and gets their schedule, then parses the relevant information using regex, because
	SAS's HTML is too ugly to use a parser.
	Todo:
	(!) Refactor times returned by the parser to datetime objects to allow for easy time-related operations.
	Or not, because it's just a prototype, and that's a lot of effort for a proof-of-concept
	'''

	import urllib2
	import urllib
	import cookielib
	import re
	from datetime import datetime

	class SASBrowser(object):
	'''
	A SAS client, currently only able to log in and retrieve a student's timetable.
	In theory, it should work with any banner system implementation, not just
	UWI SAS.
	'''

	def __init__(
	self,
	login='http://sas.uwimona.edu.jm:9010/pls/data_mona/twbkwbis.P_ValLogin',
	timetable='http://sas.uwimona.edu.jm:9010/pls/data_mona/bwskfshd.P_CrseSchd'
	):
	#Initialize with login url and timetable url
	self._login_url = login
	self._timetable_url = timetable
	self._opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))

	def _get(self, url):
	'''Send GET request to a URL'''
	request = urllib2.Request(url)
	response = self._opener.open(request)
	return response.read()

	def _post(self, url, values):
	'''Send POST request and handle cookies'''
	data = urllib.urlencode(values)
	headers = {
	'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-GB; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12',
	'Cookie' : 'TESTID=set; accessibility=false'
	}
	request = urllib2.Request(url, data, headers)
	response = self._opener.open(request)
	return response.read()

	def _get_timetable(self):
	return self._get(self._timetable_url)

	def _login(self, username, password):
	'''Logs a user in using their ID/Password combo. If successful, return True, else False'''
	credentials = {'sid' : username, 'PIN' : password}
	response = self._post(self._login_url, credentials)
	return not 'Invalid login information' in response

	def _get_courses(self):
	'''
	Get all courses a student does along with the time and day
	'''
	#Regex to extract cells containing class data from the webpage
	cell_parser = re.compile('<TD.CLASS="dd.">(.*)</TD>')
	#Regex to extract the course name, start and end times and location from the page.
	course_parser = re.compile('^(....\s\d\d\d\d-.\d\d)<BR>\d\d\d\d\d\sClass<BR>(\d?\d:\d\d\s.m)-(\d?\d:\d\d\s.m)<BR>(.*)</A>$')
	html = self._get_timetable()
	matches = cell_parser.findall(html)
	for index, match in enumerate(matches):
	if match == ' ':
	yield {
	'day' : index % 7,
	'slot' : None,
	}
	else:
	course = course_parser.search(match)
	yield {
	'day' : index % 7,
	'slot' : {
	'course' : course.group(1),
	'start_time' : course.group(2),
	'end_time' : course.group(3),
	'location' : course.group(4),
	},
	}

	def timetable(self, username, password):
	'''
	Fetch timetable from url and parse it into a list of dicts with subjects, times, etc
	If the login is successful, and the student has courses on their timetable, it will return a list
	of dicts containing the day, course name, start time, end time and location. Otherwise, it will return None.
	Note that days are 0 indexed, beginning with Monday, and ending with Sunday
	'''
	if self._login(username, password):
	return self._get_courses()
	else:
	return None