Skip to content

Instantly share code, notes, and snippets.

@arvidfm
Last active March 23, 2016 20:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arvidfm/b9a970c6ce9dd52311f8 to your computer and use it in GitHub Desktop.
Save arvidfm/b9a970c6ce9dd52311f8 to your computer and use it in GitHub Desktop.
Fetch course lists from kth.se
import argparse
import asyncio
import re
import sys
import aiohttp
import lxml.html
async def get_periods(session, course):
async with session.get("https://www.kth.se/student/kurser/kurs/{}?l=en".format(course)) as r:
doc = lxml.html.fromstring(await r.text())
periods = doc.xpath(".//div[@id='courseRoundBlocks']/div[not(h3/a/span[text()='CANCELLED'])]/"
"div/ul[@class='infoset']/li[1]/p/text()")
return [period.strip() for period in periods]
async def main():
def credit_type(string):
match = re.match("^(\d+)(?:\.(\d+))?$", string)
if match is None:
raise argparse.ArgumentTypeError("{} is not a valid amount of credits".format(string))
if match.groups()[-1] is None:
return int(match.groups()[0]), 0
else:
return tuple(map(int, match.groups()))
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('category', help="course category (e.g. DD, DT, SF)")
parser.add_argument('--max-credits', default="30.0", type=credit_type,
help="do not include courses that comprise more "
"than the given amount of credits")
parser.add_argument('--min-credits', default="0.0", type=credit_type,
help="do not include courses that comprise less "
"than the given amount of credits")
parser.add_argument('--period', choices=['P1', 'P2', 'P3', 'P4'],
help="the period the course is held")
args = parser.parse_args()
with aiohttp.ClientSession() as session:
async with session.get(
"https://www.kth.se/student/kurser/org/{}?l=en".format(args.category)) as r:
if r.status != 200:
print("Error: Couldn't fetch course list. Did you supply a valid category?")
return 1
doc = lxml.html.fromstring(await r.text())
courses = []
course_list = doc.xpath(".//div[@id='searchResult']/table/tbody/tr")
print("Fetching {} courses...".format(len(course_list)))
for i, tr in enumerate(course_list):
name, credits, code, level = [col.strip()
for col in tr.xpath("./td//text()")
if len(col.strip()) > 0]
credits = tuple(map(int, credits.split(".")))
if level not in ('First cycle', 'Second cycle') or (credits < args.min_credits or
credits > args.max_credits):
continue
periods = await get_periods(session, code)
if args.period is not None and not any(match.group() == args.period
for match in (re.search("P\d", period)
for period in periods)
if match is not None):
continue
periods = "; ".join(period for period in set(periods)
if args.period is None or args.period in period)
if len(periods) == 0:
periods = "[This course is not scheduled to be offered.]"
courses.append((credits, "{}[{}] ({}.{} hp) {}; {}".format(
"*" if level == "Second cycle" else " ", code, *credits, name, periods)))
if len(courses) == 0:
print("No matching courses found.")
else:
courses.sort()
print("\n".join(s for _, s in courses))
loop = asyncio.get_event_loop()
sys.exit(loop.run_until_complete(main()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment