arvidfm/kth-course-list.py

## kth-course-list.py

import argparse
import asyncio
import re
import sys

import aiohttp
import lxml.html

async def get_periods(session, course):
    async with session.get("https://www.kth.se/student/kurser/kurs/{}?l=en".format(course)) as r:
        doc = lxml.html.fromstring(await r.text())

    periods = doc.xpath(".//div[@id='courseRoundBlocks']/div[not(h3/a/span[text()='CANCELLED'])]/"
                        "div/ul[@class='infoset']/li[1]/p/text()")
    return [period.strip() for period in periods]


async def main():
    def credit_type(string):
        match = re.match("^(\d+)(?:\.(\d+))?$", string)
        if match is None:
            raise argparse.ArgumentTypeError("{} is not a valid amount of credits".format(string))

        if match.groups()[-1] is None:
            return int(match.groups()[0]), 0
        else:
            return tuple(map(int, match.groups()))

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('category', help="course category (e.g. DD, DT, SF)")
    parser.add_argument('--max-credits', default="30.0", type=credit_type,
                        help="do not include courses that comprise more "
                             "than the given amount of credits")
    parser.add_argument('--min-credits', default="0.0", type=credit_type,
                        help="do not include courses that comprise less "
                             "than the given amount of credits")
    parser.add_argument('--period', choices=['P1', 'P2', 'P3', 'P4'],
                        help="the period the course is held")
    args = parser.parse_args()

    with aiohttp.ClientSession() as session:
        async with session.get(
                "https://www.kth.se/student/kurser/org/{}?l=en".format(args.category)) as r:
            if r.status != 200:
                print("Error: Couldn't fetch course list. Did you supply a valid category?")
                return 1

            doc = lxml.html.fromstring(await r.text())

        courses = []

        course_list = doc.xpath(".//div[@id='searchResult']/table/tbody/tr")
        print("Fetching {} courses...".format(len(course_list)))
        for i, tr in enumerate(course_list):
            name, credits, code, level = [col.strip()
                                          for col in tr.xpath("./td//text()")
                                          if len(col.strip()) > 0]
            credits = tuple(map(int, credits.split(".")))

            if level not in ('First cycle', 'Second cycle') or (credits < args.min_credits or
                                                                credits > args.max_credits):
                continue

            periods = await get_periods(session, code)
            if args.period is not None and not any(match.group() == args.period
                                                   for match in (re.search("P\d", period)
                                                                 for period in periods)
                                                   if match is not None):
                continue

            periods = "; ".join(period for period in set(periods)
                                if args.period is None or args.period in period)
            if len(periods) == 0:
                periods = "[This course is not scheduled to be offered.]"

            courses.append((credits, "{}[{}] ({}.{} hp) {}; {}".format(
                "*" if level == "Second cycle" else " ", code, *credits, name, periods)))

        if len(courses) == 0:
            print("No matching courses found.")
        else:
            courses.sort()
            print("\n".join(s for _, s in courses))


loop = asyncio.get_event_loop()
sys.exit(loop.run_until_complete(main()))

	import argparse
	import asyncio
	import re
	import sys

	import aiohttp
	import lxml.html

	async def get_periods(session, course):
	async with session.get("https://www.kth.se/student/kurser/kurs/{}?l=en".format(course)) as r:
	doc = lxml.html.fromstring(await r.text())

	periods = doc.xpath(".//div[@id='courseRoundBlocks']/div[not(h3/a/span[text()='CANCELLED'])]/"
	"div/ul[@class='infoset']/li[1]/p/text()")
	return [period.strip() for period in periods]


	async def main():
	def credit_type(string):
	match = re.match("^(\d+)(?:\.(\d+))?$", string)
	if match is None:
	raise argparse.ArgumentTypeError("{} is not a valid amount of credits".format(string))

	if match.groups()[-1] is None:
	return int(match.groups()[0]), 0
	else:
	return tuple(map(int, match.groups()))

	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument('category', help="course category (e.g. DD, DT, SF)")
	parser.add_argument('--max-credits', default="30.0", type=credit_type,
	help="do not include courses that comprise more "
	"than the given amount of credits")
	parser.add_argument('--min-credits', default="0.0", type=credit_type,
	help="do not include courses that comprise less "
	"than the given amount of credits")
	parser.add_argument('--period', choices=['P1', 'P2', 'P3', 'P4'],
	help="the period the course is held")
	args = parser.parse_args()

	with aiohttp.ClientSession() as session:
	async with session.get(
	"https://www.kth.se/student/kurser/org/{}?l=en".format(args.category)) as r:
	if r.status != 200:
	print("Error: Couldn't fetch course list. Did you supply a valid category?")
	return 1

	doc = lxml.html.fromstring(await r.text())

	courses = []

	course_list = doc.xpath(".//div[@id='searchResult']/table/tbody/tr")
	print("Fetching {} courses...".format(len(course_list)))
	for i, tr in enumerate(course_list):
	name, credits, code, level = [col.strip()
	for col in tr.xpath("./td//text()")
	if len(col.strip()) > 0]
	credits = tuple(map(int, credits.split(".")))

	if level not in ('First cycle', 'Second cycle') or (credits < args.min_credits or
	credits > args.max_credits):
	continue

	periods = await get_periods(session, code)
	if args.period is not None and not any(match.group() == args.period
	for match in (re.search("P\d", period)
	for period in periods)
	if match is not None):
	continue

	periods = "; ".join(period for period in set(periods)
	if args.period is None or args.period in period)
	if len(periods) == 0:
	periods = "[This course is not scheduled to be offered.]"

	courses.append((credits, "{}[{}] ({}.{} hp) {}; {}".format(
	"" if level == "Second cycle" else " ", code, credits, name, periods)))

	if len(courses) == 0:
	print("No matching courses found.")
	else:
	courses.sort()
	print("\n".join(s for _, s in courses))


	loop = asyncio.get_event_loop()
	sys.exit(loop.run_until_complete(main()))