Z1ni/uta_course_info.py

## uta_course_info.py
#!/usr/bin/python3

import requests
import re
import time
import json
from argparse import ArgumentParser
from bs4 import BeautifulSoup as bs4
from datetime import datetime as dt
from datetime import timedelta
from io import StringIO

def is_period_name(tag):
    return tag.has_attr("id") and tag["id"].lower()[:13] == "opsi_periodi_"

def get_period_info(p):
    i = p.find(is_period_name)
    if i is None:
        # print("Can't find period info tag")
        return (None, None, None)
    m = re.match(r"^opsi_periodi_(\d)", i["id"].lower())
    if m is None:
        print("Can't parse period id from \"%s\"" % i["id"].lower())
        return (None, None, None)
    p_id = int(m.group(1))

    p_str = i.text.strip().replace("\r", "").replace("\t", "").replace("\n", "")
    m = re.findall(r"\((\d{1,2}\.\d{1,2}\.\d{4}).*?(\d{1,2}\.\d{1,2}\.\d{4})\)", p_str)
    if len(m) == 0:
        print("Can't parse start and end date from \"%s\"" % p_str)
        return (None, None, None)
    p_start = dt.strptime(m[0][0], "%d.%m.%Y")
    p_end = dt.strptime(m[0][1], "%d.%m.%Y")

    return (p_id, p_start, p_end)


def parse_points(raw):
    raw = raw.lower()
    if '–' in raw or '-' in raw:
        # E.g. "1-3 op" / "1-3 ects"
        m = re.match(r"^(?P<min>\d+)\W(?P<max>\d+)\s(?P<type>(?:op)|(?:ects))", raw)
        if m is None:
            return False
        p_min = int(m.group("min"))
        p_max = int(m.group("max"))
        p_type = m.group("type")
        return (p_min, p_max, p_type)
    # E.g. "5 op" / "5 ects"
    m = re.match(r"^(?P<amount>\d+)\s(?P<type>(?:op)|(?:ects))", raw)
    if m is None:
        return False
    amount = int(m.group("amount"))
    p_type = m.group("type")
    return (amount, amount, p_type)

def json_serial(obj):
    """JSON serializer for objects not serializable by default json code"""

    if isinstance(obj, dt):
        serial = obj.isoformat()
        return serial
    raise TypeError ("Type not serializable")


def get_course_info(cid):
    base_url = "https://www10.uta.fi/opas/opetusohjelma/marjapuuro.htm"
    params = {
        "id": cid
    }
    head = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:48.0) Gecko/20100101 Firefox/48.0"
    }
    r = requests.get(base_url, params=params, headers=head)
    bs = bs4(r.text, "html.parser")

    # Parse period information
    infobox = bs.find("div", class_="marjapuuro_infobox")
    img_tags = infobox.find("div", class_="opsi_toteuma_periodit").find_all("img")
    c_periods = []
    pid = 1
    for s in img_tags:
        if pid > 4:
            break
        if len(re.findall(("periodi%d\." % pid), s["src"])) > 0:
            c_periods.append(pid)
        pid += 1

    # Parse teaching language
    try:
        c_lang = [t for t in bs.find_all("div", class_="infobox_header") if t.get_text().lower() == "opetuskieli" or t.get_text().lower() == "language of instruction"][0].findNextSibling("div", class_="opsi_toteuma_kentta").get_text(strip=True).lower()
        # Crappy coversion to language codes
        if c_lang == "suomi" or c_lang == "finnish":
            c_lang = "fi"
        elif c_lang == "englanti" or c_lang == "english":
            c_lang = "en"
    except:
        c_lang = None


    # Parse teacher information
    teachers = []
    teachers_data = bs.find_all("div", class_="ope")
    for t in teachers_data:
        t_d = [s for s in t.stripped_strings]
        # print("Parsing teacher data: %s" % ", ".join(t_d))
        t_name_role = t_d[0]
        t_name, t_role = [a.strip() for a in t_name_role.split(",")]
        try:
            t_email = t_d[1].replace("[ät]", "@")
        except IndexError:
            t_email = None
        teachers.append({"name": t_name, "role": t_role, "email": t_email})

    # Try to find course homepage address
    web_addr = None
    try:
        """
        <h2>Opintojakson kotisivu</h2>
        <div class="opsi_toteuma_kentta">
            <a href="http://example.com/">http://example.com/</a>
        </div>
        """
        web_addr = [t for t in bs.find_all("h2") if t.get_text().lower() == "opintojakson kotisivu" or t.get_text().lower() == "homepage url"][0].findNextSibling("div", class_="opsi_toteuma_kentta").find("a")["href"]
    except:
        pass

    # Try to find course start and end date
    c_start = None
    c_end = None
    """
    <h2>Opetus</h2>
    <div class="opsi_toteuma_kentta">
        8.9.2016 - 4.11.2016
    </div>
    """
    t_data = None
    try:
        t_data = [t for t in bs.find_all("h2") if t.get_text().lower() == "opetus" or t.get_text().lower() == "teaching"][0].findNextSibling("div", class_="opsi_toteuma_kentta")
        raw_dates = list(t_data.stripped_strings)[0]
        raw_dates = raw_dates.replace("\r", "").replace("\n", "").replace("\t", "")
        m = re.match(r"^(\d{1,2}\.\d{1,2}\.\d{4}).*?\W(\d{1,2}\.\d{1,2}\.\d{4})", raw_dates)
        if m is None:
            # Try other date format
            m = re.match(r"^(\d{1,2}-.*?-\d{4}).*?\S(\d{1,2}-.*?-\d{4})", raw_dates)
            if m is None:
                # Give up
                print("Date parsing failed, didn't match known formats: \"%s\"" % raw_dates)
            else:
                # TODO: Fails with systems that use other locale than en_US
                #       Use locale.setlocale()
                c_start = dt.strptime(m.group(1), "%d-%b-%Y")
                c_end = dt.strptime(m.group(2), "%d-%b-%Y")
        else:
            c_start = dt.strptime(m.group(1), "%d.%m.%Y")
            c_end = dt.strptime(m.group(2), "%d.%m.%Y")
    except:
        pass

    # Parse teaching info
    hours = -1
    lectures = []

    if t_data is not None:
        for i in t_data.find_all("div", class_="opsi_toteuma_opetustapa"):
            for n in range(0, len(list(i.stripped_strings))):
                possible_name = list(i.stripped_strings)[n].lower()
                if possible_name == "luento-opetus" or possible_name == "lectures":
                    # Try to find total teaching hours
                    # Most likely the next string contains hours
                    possible_hours = list(i.stripped_strings)[n+1].lower()
                    # print("Possible hours: %s" % possible_hours)
                    m = re.match(r"^(\d+)\s(?:(?:tuntia)|(?:hours))$", possible_hours)
                    hours = -1
                    if m is not None:
                        # TODO: try-except ValueError, even though that shouldn't happen because of regex \d
                        hours = int(m.group(1))
                    # print("Hours: %d" % hours)

        # Parse lecture times
        # First one is (always?) teaching dates/times
        try:
            raw_lecture_dates = [list(t.stripped_strings)[0] for t in t_data.find("div", class_="opsi_toteuma_opetustapa").find_all("div", class_="opsi_opetustapa_ajankohta")]

            for p in raw_lecture_dates:
                # print(p)

                # Handle single dates
                m = re.match(r"\w{2}\s(?P<date>\d{1,2}\.\d{1,2}\.\d{4})\sklo\s(?P<startH>\d{1,2})-(?P<endH>\d{1,2}),\s(?P<loc>.*?)$", p)
                if m is None:
                    # Try another format
                    m = re.match(r"\w{3}\s(?P<date>\d{1,2}-.*?-\d{4})\sat\s(?P<startH>\d{1,2})-(?P<endH>\d{1,2}),\s(?P<loc>.*?)$", p)

                if m is not None:
                    raw_date = m.group("date")
                    raw_start = m.group("startH")
                    raw_end = m.group("endH")
                    raw_loc = m.group("loc")

                    try:
                        l_date = dt.strptime(raw_date, "%d.%m.%Y")
                    except ValueError:
                        l_date = dt.strptime(raw_date, "%d-%b-%Y")

                    l_start = l_date.replace(hour=int(raw_start))
                    l_end = l_date.replace(hour=int(raw_end))
                    l_loc = raw_loc
                    # TODO: Parse location data more for information
                    lectures.append({"start": l_start, "end": l_end, "location": l_loc, "exception": False})

                else:
                    # Handle weekly occurences
                    m = re.match(r"\w{2}\s(?P<startDate>\d{1,2}\.\d{1,2}\.\d{4})\s-\s(?P<endDate>\d{1,2}\.\d{1,2}\.\d{4})\sviikoittain\sklo\s(?P<startH>\d{1,2})-(?P<endH>\d{1,2}),\s(?P<loc>.*?)$", p)
                    if m is None:
                        # Try another format
                        m = re.match(r"\w{3}\s(?P<startDate>\d{1,2}-.*?-\d{4})\s-\s(?P<endDate>\d{1,2}-.*?-\d{4})\sweekly\sat\s(?P<startH>\d{1,2})-(?P<endH>\d{1,2}),\s(?P<loc>.*?)$", p)

                    if m is not None:
                        raw_start_date = m.group("startDate")
                        raw_end_date = m.group("endDate")
                        raw_start_h = m.group("startH")
                        raw_end_h = m.group("endH")
                        raw_loc = m.group("loc")

                        try:
                            l_start_date = dt.strptime(raw_start_date, "%d.%m.%Y")
                        except ValueError:
                            l_start_date = dt.strptime(raw_start_date, "%d-%b-%Y")
                        try:
                            l_end_date = dt.strptime(raw_end_date, "%d.%m.%Y")
                        except ValueError:
                            l_end_date = dt.strptime(raw_end_date, "%d-%b-%Y")
                        cur_date = l_start_date

                        while cur_date <= l_end_date:
                            l_start = cur_date.replace(hour=int(raw_start_h))
                            l_end = cur_date.replace(hour=int(raw_end_h))
                            lectures.append({"start": l_start, "end": l_end, "location": raw_loc, "exception": False})
                            cur_date += timedelta(days=7)   # Skip to the next week

        except Exception as e:
            # print(e)
            pass

        # Parse exceptional lecture times
        # NOTE: English date parsing is untested
        try:
            raw_exception_dates = [list(t.stripped_strings)[0] for t in t_data.find("div", class_="opsi_toteuma_opetustapa").find_all("div", class_="opsi_opetustapa_ajankohta_poikkeus")]

            for p in raw_exception_dates:
                p = p.replace("\r", "").replace("\n", "").replace("\t", "")
                # print(p)

                # Handle single dates
                m = re.match(r"(?P<date>\d{1,2}\.\d{1,2}\.\d{4}).*?(?:klo\s(?P<startH>\d{1,2}).*?\W(?P<endH>\d{1,2}).*?)?,\s(?P<loc>.*?)$", p)
                if m is None:
                    # Try another format
                    m = re.match(r"(?P<date>\d{1,2}-.*?-\d{4}).*?(?:at\s(?P<startH>\d{1,2}).*?\W(?P<endH>\d{1,2}),\s(?P<loc>.*?)$", p)

                if m is not None:
                    raw_date = m.group("date")
                    raw_start = m.group("startH")
                    raw_end = m.group("endH")
                    raw_loc = m.group("loc")

                    try:
                        l_date = dt.strptime(raw_date, "%d.%m.%Y")
                    except ValueError:
                        l_date = dt.strptime(raw_date, "%d-%b-%Y")

                    # Get possibly existing entry
                    ex = None
                    ex_id = -1
                    for i, l in enumerate(lectures):
                        if l["start"].strftime("%Y-%m-%d") == l_date.strftime("%Y-%m-%d"):
                            ex = l
                            ex_id = i
                            break

                    if raw_start is not None:
                        l_start = l_date.replace(hour=int(raw_start))
                    else:
                        l_start = l_date
                        if ex is not None:
                            l_start = l_date.replace(hour=ex["start"].hour)
                    if raw_end is not None:
                        l_end = l_date.replace(hour=int(raw_end))
                    else:
                        l_end = l_date
                        if ex is not None:
                            l_end = l_date.replace(hour=ex["end"].hour)
                    l_loc = raw_loc

                    if ex is not None:
                        # Replace existing lecture information
                        lectures[ex_id] = {"start": l_start, "end": l_end, "location": l_loc, "exception": True}
                    else:
                        lectures.append({"start": l_start, "end": l_end, "location": l_loc, "exception": True})

                # TODO: Handle weekly exceptions?

        except Exception as e:
            # print(e)
            pass

        # TODO: Parse exercise times

    return {"teachers": teachers, "homepage": web_addr, "periods": c_periods, "language": c_lang, "start": c_start, "end": c_end, "lectures": lectures, "lecture_hours": hours}


if __name__ == "__main__":

    parser = ArgumentParser(description="UTA course information parser", epilog="If no flags are specified, defaults to all courses text output to standard output.")
    parser.add_argument("-c", "--courses", type=str, dest="courses", help="Course codes (separated with a comma)")
    parser.add_argument("-j", "--json", dest="json", action="store_true", help="Output JSON")
    parser.add_argument("-i", "--ical", dest="ical", action="store_true", help="Output iCalendar")
    parser.add_argument("-d", "--dest", dest="dest", type=str, help="Output to file instead of standard output")

    args = parser.parse_args()

    what_courses = []
    if args.courses is not None:
        what_courses = args.courses.split(",")

    base_url = "https://www10.uta.fi/opas/opetusohjelma/opetusohjelma.htm"
    params = {
        "kieli": "fi",
        "ots": 15,  # 15 CS BSc, 16 CS MSc
        "lvv": 2016,
        "ops": 142
        # To get all information in the same page, use:
        # "display_long": "true"
        # I'm not using that for now, as it makes parsing a bit tedious
    }
    head = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:48.0) Gecko/20100101 Firefox/48.0"
    }

    # r = requests.get(base_url, params=params, headers=head)
    # bs = bs4(r.text, "html.parser")

    # Use cached page for testing
    bs = None
    with open("uta_marjapuuro.html", "r") as f:
        bs = bs4(f.read(), "html.parser")

    known_courses = []

    # Periods
    periods_data = bs.find_all("div", class_="opsi_periodi")
    periods = []

    for p in periods_data:

        # Get period ID
        p_id, start, end = get_period_info(p)
        if p_id is None:
            # print("Period information parsing failed!")
            continue

        # Courses
        courses_data = p.find_all("div", class_="opsi_opintojakso")
        courses = []

        for c in courses_data:
            # Course
            code = c.find("span", class_="opsi_opintojakso_koodi").text.strip()
            if len(code) == 0:
                #  Don't add entries that don't have a code
                continue
            """if code in known_courses:
                # Skip
                continue
            known_courses.append(code)"""

            # Debug
            """if code != "MTTTP1":
                continue"""
            # print(code)
            if len(what_courses) > 0 and code not in what_courses:
                continue

            name_tag = c.find("span", class_="opsi_opintojakso_nimi")
            name = name_tag.text.strip()
            c_id = int(name_tag.find("a")["href"].split("id=")[1])
            points_raw = c.find("span", class_="opsi_opintojakso_laajuus").text.strip()
            points = parse_points(points_raw)
            if not points:
                # Parsing failed
                # print("Information parsing failed for course %s (%s)!" % (code, name))
                points = (-1, -1, "")
            points_struct = {"min": points[0], "max": points[1], "type": points[2]}

            c_info = get_course_info(c_id)

            courses.append({"id": c_id, "code": code, "name": name, "points": points_struct, "info": c_info})

        periods.append({"id": p_id, "start": start, "end": end, "courses": courses})
        break

    # TODO: Timestamp as milliseconds, because javascript (just *1000)
    data = {"timestamp": int(time.time()), "periods": periods}

    if args.json:
        data["timestamp"] = data["timestamp"] * 1000    # JS likes timestamps as milliseconds
        json_io = StringIO()
        json.dump(data, json_io, default=json_serial)
        json_data = json_io.getvalue()

        if args.dest is None:
            print(json_data)
        else:
            try:
                with open(args.dest, "w") as f:
                    f.write(json_data)
            except IOError as e:
                print("JSON file writing failed: %s" % e)

    elif args.ical:
        # TODO
        pass

    else:
        # Stdout
        print("----- UTA course information -----")
        for p in data["periods"]:
            print("Period %d, %s - %s" % (p["id"], p["start"].strftime("%d.%m.%Y"), p["end"].strftime("%d.%m.%Y")))
            for c in p["courses"]:
                # {"teachers": teachers, "homepage": web_addr, "periods": c_periods, "language": c_lang, "start": c_start, "end": c_end, "lectures": lectures, "lecture_hours": hours}
                points = c["points"]
                if points["min"] == points["max"]:
                    points_str = str(points["min"])
                else:
                    points_str = "%d-%d" % (points["min"], points["max"])
                print("  Course %s (%s), %s op/ects:" % (c["code"], c["name"], points_str))
                print("    Teacher(s):")
                for t in c["info"]["teachers"]:
                    if t["email"] is not None:
                        print("      - %s (%s)" % (t["name"], t["email"]))
                    else:
                        print("      - %s" % t["name"])
                print("    Start: %s" % c["info"]["start"].strftime("%d.%m.%Y"))
                print("    End: %s" % c["info"]["end"].strftime("%d.%m.%Y"))
                print("    Period(s): %s" % ", ".join([str(a) for a in c["info"]["periods"]]))
                print("    Language: %s" % c["info"]["language"])
                print("    Homepage: %s" % c["info"]["homepage"])
                print("    Lectures:")
                for l in sorted(c["info"]["lectures"], key=lambda x: x["start"]):   # Sort by dates
                    print("      %s - %s @ %s" % (l["start"].strftime("%a %d.%m.%Y %H:%M"), l["end"].strftime("%H:%M"), l["location"]))
	#!/usr/bin/python3

	import requests
	import re
	import time
	import json
	from argparse import ArgumentParser
	from bs4 import BeautifulSoup as bs4
	from datetime import datetime as dt
	from datetime import timedelta
	from io import StringIO

	def is_period_name(tag):
	return tag.has_attr("id") and tag["id"].lower()[:13] == "opsi_periodi_"

	def get_period_info(p):
	i = p.find(is_period_name)
	if i is None:
	# print("Can't find period info tag")
	return (None, None, None)
	m = re.match(r"^opsi_periodi_(\d)", i["id"].lower())
	if m is None:
	print("Can't parse period id from \"%s\"" % i["id"].lower())
	return (None, None, None)
	p_id = int(m.group(1))

	p_str = i.text.strip().replace("\r", "").replace("\t", "").replace("\n", "")
	m = re.findall(r"\((\d{1,2}\.\d{1,2}\.\d{4}).*?(\d{1,2}\.\d{1,2}\.\d{4})\)", p_str)
	if len(m) == 0:
	print("Can't parse start and end date from \"%s\"" % p_str)
	return (None, None, None)
	p_start = dt.strptime(m[0][0], "%d.%m.%Y")
	p_end = dt.strptime(m[0][1], "%d.%m.%Y")

	return (p_id, p_start, p_end)


	def parse_points(raw):
	raw = raw.lower()
	if '–' in raw or '-' in raw:
	# E.g. "1-3 op" / "1-3 ects"
	m = re.match(r"^(?P<min>\d+)\W(?P<max>\d+)\s(?P<type>(?:op)\|(?:ects))", raw)
	if m is None:
	return False
	p_min = int(m.group("min"))
	p_max = int(m.group("max"))
	p_type = m.group("type")
	return (p_min, p_max, p_type)
	# E.g. "5 op" / "5 ects"
	m = re.match(r"^(?P<amount>\d+)\s(?P<type>(?:op)\|(?:ects))", raw)
	if m is None:
	return False
	amount = int(m.group("amount"))
	p_type = m.group("type")
	return (amount, amount, p_type)

	def json_serial(obj):
	"""JSON serializer for objects not serializable by default json code"""

	if isinstance(obj, dt):
	serial = obj.isoformat()
	return serial
	raise TypeError ("Type not serializable")


	def get_course_info(cid):
	base_url = "https://www10.uta.fi/opas/opetusohjelma/marjapuuro.htm"
	params = {
	"id": cid
	}
	head = {
	"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:48.0) Gecko/20100101 Firefox/48.0"
	}
	r = requests.get(base_url, params=params, headers=head)
	bs = bs4(r.text, "html.parser")

	# Parse period information
	infobox = bs.find("div", class_="marjapuuro_infobox")
	img_tags = infobox.find("div", class_="opsi_toteuma_periodit").find_all("img")
	c_periods = []
	pid = 1
	for s in img_tags:
	if pid > 4:
	break
	if len(re.findall(("periodi%d\." % pid), s["src"])) > 0:
	c_periods.append(pid)
	pid += 1

	# Parse teaching language
	try:
	c_lang = [t for t in bs.find_all("div", class_="infobox_header") if t.get_text().lower() == "opetuskieli" or t.get_text().lower() == "language of instruction"][0].findNextSibling("div", class_="opsi_toteuma_kentta").get_text(strip=True).lower()
	# Crappy coversion to language codes
	if c_lang == "suomi" or c_lang == "finnish":
	c_lang = "fi"
	elif c_lang == "englanti" or c_lang == "english":
	c_lang = "en"
	except:
	c_lang = None


	# Parse teacher information
	teachers = []
	teachers_data = bs.find_all("div", class_="ope")
	for t in teachers_data:
	t_d = [s for s in t.stripped_strings]
	# print("Parsing teacher data: %s" % ", ".join(t_d))
	t_name_role = t_d[0]
	t_name, t_role = [a.strip() for a in t_name_role.split(",")]
	try:
	t_email = t_d[1].replace("[ät]", "@")
	except IndexError:
	t_email = None
	teachers.append({"name": t_name, "role": t_role, "email": t_email})

	# Try to find course homepage address
	web_addr = None
	try:
	"""
	<h2>Opintojakson kotisivu</h2>
	<div class="opsi_toteuma_kentta">
	<a href="http://example.com/">http://example.com/</a>
	</div>
	"""
	web_addr = [t for t in bs.find_all("h2") if t.get_text().lower() == "opintojakson kotisivu" or t.get_text().lower() == "homepage url"][0].findNextSibling("div", class_="opsi_toteuma_kentta").find("a")["href"]
	except:
	pass

	# Try to find course start and end date
	c_start = None
	c_end = None
	"""
	<h2>Opetus</h2>
	<div class="opsi_toteuma_kentta">
	8.9.2016 - 4.11.2016
	</div>
	"""
	t_data = None
	try:
	t_data = [t for t in bs.find_all("h2") if t.get_text().lower() == "opetus" or t.get_text().lower() == "teaching"][0].findNextSibling("div", class_="opsi_toteuma_kentta")
	raw_dates = list(t_data.stripped_strings)[0]
	raw_dates = raw_dates.replace("\r", "").replace("\n", "").replace("\t", "")
	m = re.match(r"^(\d{1,2}\.\d{1,2}\.\d{4}).*?\W(\d{1,2}\.\d{1,2}\.\d{4})", raw_dates)
	if m is None:
	# Try other date format
	m = re.match(r"^(\d{1,2}-.?-\d{4}).?\S(\d{1,2}-.*?-\d{4})", raw_dates)
	if m is None:
	# Give up
	print("Date parsing failed, didn't match known formats: \"%s\"" % raw_dates)
	else:
	# TODO: Fails with systems that use other locale than en_US
	# Use locale.setlocale()
	c_start = dt.strptime(m.group(1), "%d-%b-%Y")
	c_end = dt.strptime(m.group(2), "%d-%b-%Y")
	else:
	c_start = dt.strptime(m.group(1), "%d.%m.%Y")
	c_end = dt.strptime(m.group(2), "%d.%m.%Y")
	except:
	pass

	# Parse teaching info
	hours = -1
	lectures = []

	if t_data is not None:
	for i in t_data.find_all("div", class_="opsi_toteuma_opetustapa"):
	for n in range(0, len(list(i.stripped_strings))):
	possible_name = list(i.stripped_strings)[n].lower()
	if possible_name == "luento-opetus" or possible_name == "lectures":
	# Try to find total teaching hours
	# Most likely the next string contains hours
	possible_hours = list(i.stripped_strings)[n+1].lower()
	# print("Possible hours: %s" % possible_hours)
	m = re.match(r"^(\d+)\s(?:(?:tuntia)\|(?:hours))$", possible_hours)
	hours = -1
	if m is not None:
	# TODO: try-except ValueError, even though that shouldn't happen because of regex \d
	hours = int(m.group(1))
	# print("Hours: %d" % hours)

	# Parse lecture times
	# First one is (always?) teaching dates/times
	try:
	raw_lecture_dates = [list(t.stripped_strings)[0] for t in t_data.find("div", class_="opsi_toteuma_opetustapa").find_all("div", class_="opsi_opetustapa_ajankohta")]

	for p in raw_lecture_dates:
	# print(p)

	# Handle single dates
	m = re.match(r"\w{2}\s(?P<date>\d{1,2}\.\d{1,2}\.\d{4})\sklo\s(?P<startH>\d{1,2})-(?P<endH>\d{1,2}),\s(?P<loc>.*?)$", p)
	if m is None:
	# Try another format
	m = re.match(r"\w{3}\s(?P<date>\d{1,2}-.?-\d{4})\sat\s(?P<startH>\d{1,2})-(?P<endH>\d{1,2}),\s(?P<loc>.?)$", p)

	if m is not None:
	raw_date = m.group("date")
	raw_start = m.group("startH")
	raw_end = m.group("endH")
	raw_loc = m.group("loc")

	try:
	l_date = dt.strptime(raw_date, "%d.%m.%Y")
	except ValueError:
	l_date = dt.strptime(raw_date, "%d-%b-%Y")

	l_start = l_date.replace(hour=int(raw_start))
	l_end = l_date.replace(hour=int(raw_end))
	l_loc = raw_loc
	# TODO: Parse location data more for information
	lectures.append({"start": l_start, "end": l_end, "location": l_loc, "exception": False})

	else:
	# Handle weekly occurences
	m = re.match(r"\w{2}\s(?P<startDate>\d{1,2}\.\d{1,2}\.\d{4})\s-\s(?P<endDate>\d{1,2}\.\d{1,2}\.\d{4})\sviikoittain\sklo\s(?P<startH>\d{1,2})-(?P<endH>\d{1,2}),\s(?P<loc>.*?)$", p)
	if m is None:
	# Try another format
	m = re.match(r"\w{3}\s(?P<startDate>\d{1,2}-.?-\d{4})\s-\s(?P<endDate>\d{1,2}-.?-\d{4})\sweekly\sat\s(?P<startH>\d{1,2})-(?P<endH>\d{1,2}),\s(?P<loc>.*?)$", p)

	if m is not None:
	raw_start_date = m.group("startDate")
	raw_end_date = m.group("endDate")
	raw_start_h = m.group("startH")
	raw_end_h = m.group("endH")
	raw_loc = m.group("loc")

	try:
	l_start_date = dt.strptime(raw_start_date, "%d.%m.%Y")
	except ValueError:
	l_start_date = dt.strptime(raw_start_date, "%d-%b-%Y")
	try:
	l_end_date = dt.strptime(raw_end_date, "%d.%m.%Y")
	except ValueError:
	l_end_date = dt.strptime(raw_end_date, "%d-%b-%Y")
	cur_date = l_start_date

	while cur_date <= l_end_date:
	l_start = cur_date.replace(hour=int(raw_start_h))
	l_end = cur_date.replace(hour=int(raw_end_h))
	lectures.append({"start": l_start, "end": l_end, "location": raw_loc, "exception": False})
	cur_date += timedelta(days=7) # Skip to the next week

	except Exception as e:
	# print(e)
	pass

	# Parse exceptional lecture times
	# NOTE: English date parsing is untested
	try:
	raw_exception_dates = [list(t.stripped_strings)[0] for t in t_data.find("div", class_="opsi_toteuma_opetustapa").find_all("div", class_="opsi_opetustapa_ajankohta_poikkeus")]

	for p in raw_exception_dates:
	p = p.replace("\r", "").replace("\n", "").replace("\t", "")
	# print(p)

	# Handle single dates
	m = re.match(r"(?P<date>\d{1,2}\.\d{1,2}\.\d{4}).?(?:klo\s(?P<startH>\d{1,2}).?\W(?P<endH>\d{1,2}).?)?,\s(?P<loc>.?)$", p)
	if m is None:
	# Try another format
	m = re.match(r"(?P<date>\d{1,2}-.?-\d{4}).?(?:at\s(?P<startH>\d{1,2}).?\W(?P<endH>\d{1,2}),\s(?P<loc>.?)$", p)

	if m is not None:
	raw_date = m.group("date")
	raw_start = m.group("startH")
	raw_end = m.group("endH")
	raw_loc = m.group("loc")

	try:
	l_date = dt.strptime(raw_date, "%d.%m.%Y")
	except ValueError:
	l_date = dt.strptime(raw_date, "%d-%b-%Y")

	# Get possibly existing entry
	ex = None
	ex_id = -1
	for i, l in enumerate(lectures):
	if l["start"].strftime("%Y-%m-%d") == l_date.strftime("%Y-%m-%d"):
	ex = l
	ex_id = i
	break

	if raw_start is not None:
	l_start = l_date.replace(hour=int(raw_start))
	else:
	l_start = l_date
	if ex is not None:
	l_start = l_date.replace(hour=ex["start"].hour)
	if raw_end is not None:
	l_end = l_date.replace(hour=int(raw_end))
	else:
	l_end = l_date
	if ex is not None:
	l_end = l_date.replace(hour=ex["end"].hour)
	l_loc = raw_loc

	if ex is not None:
	# Replace existing lecture information
	lectures[ex_id] = {"start": l_start, "end": l_end, "location": l_loc, "exception": True}
	else:
	lectures.append({"start": l_start, "end": l_end, "location": l_loc, "exception": True})

	# TODO: Handle weekly exceptions?

	except Exception as e:
	# print(e)
	pass

	# TODO: Parse exercise times

	return {"teachers": teachers, "homepage": web_addr, "periods": c_periods, "language": c_lang, "start": c_start, "end": c_end, "lectures": lectures, "lecture_hours": hours}


	if __name__ == "__main__":

	parser = ArgumentParser(description="UTA course information parser", epilog="If no flags are specified, defaults to all courses text output to standard output.")
	parser.add_argument("-c", "--courses", type=str, dest="courses", help="Course codes (separated with a comma)")
	parser.add_argument("-j", "--json", dest="json", action="store_true", help="Output JSON")
	parser.add_argument("-i", "--ical", dest="ical", action="store_true", help="Output iCalendar")
	parser.add_argument("-d", "--dest", dest="dest", type=str, help="Output to file instead of standard output")

	args = parser.parse_args()

	what_courses = []
	if args.courses is not None:
	what_courses = args.courses.split(",")

	base_url = "https://www10.uta.fi/opas/opetusohjelma/opetusohjelma.htm"
	params = {
	"kieli": "fi",
	"ots": 15, # 15 CS BSc, 16 CS MSc
	"lvv": 2016,
	"ops": 142
	# To get all information in the same page, use:
	# "display_long": "true"
	# I'm not using that for now, as it makes parsing a bit tedious
	}
	head = {
	"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:48.0) Gecko/20100101 Firefox/48.0"
	}

	# r = requests.get(base_url, params=params, headers=head)
	# bs = bs4(r.text, "html.parser")

	# Use cached page for testing
	bs = None
	with open("uta_marjapuuro.html", "r") as f:
	bs = bs4(f.read(), "html.parser")

	known_courses = []

	# Periods
	periods_data = bs.find_all("div", class_="opsi_periodi")
	periods = []

	for p in periods_data:

	# Get period ID
	p_id, start, end = get_period_info(p)
	if p_id is None:
	# print("Period information parsing failed!")
	continue

	# Courses
	courses_data = p.find_all("div", class_="opsi_opintojakso")
	courses = []

	for c in courses_data:
	# Course
	code = c.find("span", class_="opsi_opintojakso_koodi").text.strip()
	if len(code) == 0:
	# Don't add entries that don't have a code
	continue
	"""if code in known_courses:
	# Skip
	continue
	known_courses.append(code)"""

	# Debug
	"""if code != "MTTTP1":
	continue"""
	# print(code)
	if len(what_courses) > 0 and code not in what_courses:
	continue

	name_tag = c.find("span", class_="opsi_opintojakso_nimi")
	name = name_tag.text.strip()
	c_id = int(name_tag.find("a")["href"].split("id=")[1])
	points_raw = c.find("span", class_="opsi_opintojakso_laajuus").text.strip()
	points = parse_points(points_raw)
	if not points:
	# Parsing failed
	# print("Information parsing failed for course %s (%s)!" % (code, name))
	points = (-1, -1, "")
	points_struct = {"min": points[0], "max": points[1], "type": points[2]}

	c_info = get_course_info(c_id)

	courses.append({"id": c_id, "code": code, "name": name, "points": points_struct, "info": c_info})

	periods.append({"id": p_id, "start": start, "end": end, "courses": courses})
	break

	# TODO: Timestamp as milliseconds, because javascript (just *1000)
	data = {"timestamp": int(time.time()), "periods": periods}

	if args.json:
	data["timestamp"] = data["timestamp"] * 1000 # JS likes timestamps as milliseconds
	json_io = StringIO()
	json.dump(data, json_io, default=json_serial)
	json_data = json_io.getvalue()

	if args.dest is None:
	print(json_data)
	else:
	try:
	with open(args.dest, "w") as f:
	f.write(json_data)
	except IOError as e:
	print("JSON file writing failed: %s" % e)

	elif args.ical:
	# TODO
	pass

	else:
	# Stdout
	print("----- UTA course information -----")
	for p in data["periods"]:
	print("Period %d, %s - %s" % (p["id"], p["start"].strftime("%d.%m.%Y"), p["end"].strftime("%d.%m.%Y")))
	for c in p["courses"]:
	# {"teachers": teachers, "homepage": web_addr, "periods": c_periods, "language": c_lang, "start": c_start, "end": c_end, "lectures": lectures, "lecture_hours": hours}
	points = c["points"]
	if points["min"] == points["max"]:
	points_str = str(points["min"])
	else:
	points_str = "%d-%d" % (points["min"], points["max"])
	print(" Course %s (%s), %s op/ects:" % (c["code"], c["name"], points_str))
	print(" Teacher(s):")
	for t in c["info"]["teachers"]:
	if t["email"] is not None:
	print(" - %s (%s)" % (t["name"], t["email"]))
	else:
	print(" - %s" % t["name"])
	print(" Start: %s" % c["info"]["start"].strftime("%d.%m.%Y"))
	print(" End: %s" % c["info"]["end"].strftime("%d.%m.%Y"))
	print(" Period(s): %s" % ", ".join([str(a) for a in c["info"]["periods"]]))
	print(" Language: %s" % c["info"]["language"])
	print(" Homepage: %s" % c["info"]["homepage"])
	print(" Lectures:")
	for l in sorted(c["info"]["lectures"], key=lambda x: x["start"]): # Sort by dates
	print(" %s - %s @ %s" % (l["start"].strftime("%a %d.%m.%Y %H:%M"), l["end"].strftime("%H:%M"), l["location"]))