Created
September 17, 2012 02:54
-
-
Save JonCooperWorks/3735312 to your computer and use it in GitHub Desktop.
A SAS wrapper to allow for timetable access
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
sas.py | |
Logs in to a student's SAS account and gets their schedule, then parses the relevant information using regex, because | |
SAS's HTML is too ugly to use a parser. | |
Todo: | |
(!) Refactor times returned by the parser to datetime objects to allow for easy time-related operations. | |
Or not, because it's just a prototype, and that's a lot of effort for a proof-of-concept | |
''' | |
import urllib2 | |
import urllib | |
import cookielib | |
import re | |
from datetime import datetime | |
class SASBrowser(object): | |
''' | |
A SAS client, currently only able to log in and retrieve a student's timetable. | |
In theory, it should work with any banner system implementation, not just | |
UWI SAS. | |
''' | |
def __init__( | |
self, | |
login='http://sas.uwimona.edu.jm:9010/pls/data_mona/twbkwbis.P_ValLogin', | |
timetable='http://sas.uwimona.edu.jm:9010/pls/data_mona/bwskfshd.P_CrseSchd' | |
): | |
#Initialize with login url and timetable url | |
self._login_url = login | |
self._timetable_url = timetable | |
self._opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar())) | |
def _get(self, url): | |
'''Send GET request to a URL''' | |
request = urllib2.Request(url) | |
response = self._opener.open(request) | |
return response.read() | |
def _post(self, url, values): | |
'''Send POST request and handle cookies''' | |
data = urllib.urlencode(values) | |
headers = { | |
'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-GB; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12', | |
'Cookie' : 'TESTID=set; accessibility=false' | |
} | |
request = urllib2.Request(url, data, headers) | |
response = self._opener.open(request) | |
return response.read() | |
def _get_timetable(self): | |
return self._get(self._timetable_url) | |
def _login(self, username, password): | |
'''Logs a user in using their ID/Password combo. If successful, return True, else False''' | |
credentials = {'sid' : username, 'PIN' : password} | |
response = self._post(self._login_url, credentials) | |
return not 'Invalid login information' in response | |
def _get_courses(self): | |
''' | |
Get all courses a student does along with the time and day | |
''' | |
#Regex to extract cells containing class data from the webpage | |
cell_parser = re.compile('<TD.*CLASS="dd.*">(.*)</TD>') | |
#Regex to extract the course name, start and end times and location from the page. | |
course_parser = re.compile('^(....\s\d\d\d\d-.\d\d)<BR>\d\d\d\d\d\sClass<BR>(\d?\d:\d\d\s.m)-(\d?\d:\d\d\s.m)<BR>(.*)</A>$') | |
html = self._get_timetable() | |
matches = cell_parser.findall(html) | |
for index, match in enumerate(matches): | |
if match == ' ': | |
yield { | |
'day' : index % 7, | |
'slot' : None, | |
} | |
else: | |
course = course_parser.search(match) | |
yield { | |
'day' : index % 7, | |
'slot' : { | |
'course' : course.group(1), | |
'start_time' : course.group(2), | |
'end_time' : course.group(3), | |
'location' : course.group(4), | |
}, | |
} | |
def timetable(self, username, password): | |
''' | |
Fetch timetable from url and parse it into a list of dicts with subjects, times, etc | |
If the login is successful, and the student has courses on their timetable, it will return a list | |
of dicts containing the day, course name, start time, end time and location. Otherwise, it will return None. | |
Note that days are 0 indexed, beginning with Monday, and ending with Sunday | |
''' | |
if self._login(username, password): | |
return self._get_courses() | |
else: | |
return None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment