laramiel/wwu_schedule.py

## wwu_schedule.py
#!/usr/bin/env python3
#
'''
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
WWU Schedule Generator
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This requires python; it's probably easiest to run on linux or WSL.
First install the prerequisites:

$ pip3 install BeautifulSoup4 pandas html5lib ortools lxml

Then execute the script using python3:

$ python3 wwu_schedule.py 'CSCI 145' 'ENG 101' 'PHYS 161'


The script will attempt to generate all possible schedules for each of
the listed classes.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Options include:

--term indicates the term. This should be year followed by the semester code:

  Winter  10
  Spring  20
  Summer  30
  Fall    40

--year <academic year>  Specify the academic year.
  This should be the last two digits of the two consecutive years in which
  the term falls, such as '2324'.

--limit <number>  Number of scheduled printed.
  By default prints all possible schedules.

--include (-i) <section>
  Includes the specific class section (course number).

--exclude (-e) <section>
  Excludes the specific class section (course number).


--preferred-time  <time spec>
  Sets a preferred time range used to score schedules.
  Classes staring before or after this range are penalized.

  Example: "9:00-3:30 pm"

-a, -b, -c, -d  <course>
  Out of courses specified by -a, only one section will be scheduled.
  Likewise for -b/-c/-d.


The simplest scheduled generator might be for two classes, like:

$ python wwu_schedule.py ENG_101 MATH_125  --limit 0


If more class selection is useful, increase the limit:


$ python wwu_schedule.py ENG_101 MATH_125  --limit 5


An even more complex invocation, which prefers 10:00 am classes
and needs to exclude some sections, might look like this:

$ python wwu_schedule.py PHYS_162 MATH_125 ENG_201 \
    --preferred-time "10:00-2:15 pm"  \
    -i 10655 -e 10232 --limit 3


To generate a class list with the following combination of classes:
   PHYS_163
   HIST_112
   Any of CSCI 241, 247, or 301
   Any of COMM 224 or 235

$ python wwu_schedule1.py PHYS_163 HIST_112 \
    -a CSCI_241 CSCI_301 CSCI_247 \
    -b COMM_235 -b COMM_224


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
'''

import requests
import re
import pandas as pd
import lxml
import html5lib
import datetime as dt
import argparse
import sys
import io

from ortools.sat.python import cp_model

from bs4 import BeautifulSoup

from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

URI = 'https://web4u.banner.wwu.edu/pls/wwis/wwskcfnd.TimeTable'

# WWU courses are identified by a 3- or 4- letter department prefix and a 3 digit
# catalog number.
COURSE_RE = re.compile(
    r'('
    r'ACCT|AHE|AMST|ASLC|ANTH|ARAB|ART|A/HI|AECI|AUAP|ASTR|BNS|BIOL|BUS|C/AM|CHEM|CHIN|'
    r'CLST|CD|CSEC|CSD|COMM|C2C|CSCI|CISS|DISA|DNC|DATA|DSCI|DSGN|DIAD|ECE|EAST|ECON|EDUC|'
    r'EDAD|ESJ|EECE|ELED|ENRG|ENGR|ENG|ENTR|ESCI|ENVS|EUS|EXCE|FAIR|FIN|FREN|GEOL|GERM|GRAD|'
    r'GREK|HLED|HIST|HGST|HNRS|HRM|HSP|HUMA|ID|IT|IEP|IBUS|INTL|ITAL|JAPN|JOUR|KIN|LAT|LDST|'
    r'LIBR|LING|MGMT|MIS|MFGE|MACS|MKTG|MBA|MPAC|MSCI|MATH|M/CS|LANG|MDS|MLE|MUS|NURS|OPS|'
    r'PHIL|PA|PE|PEH|PHYS|PLSC|PME|PORT|PSY|RECR|RC|REL|RUSS|SALI|SCED|SEC|SMNR|SOC|SPAN|SPED|'
    r'SUST|TEOP|TESL|THTR|UEPP|WGSS'
    r')\s+(\d\d\d)(\s|$)')

HDRS = [
    'Term',
    'Crn',  # 1 (section)
    'Days',  # 2
    'Time',  # 3
    'Instructor',
    'Room',
    'Addl Fees',
    'Cap',  # 7
    'Enrl',  # 8
    'Avail',  # 9
    'Waitlist',
    'Restrictions',
    'Attributes'
]


def _get_session():
    s = requests.Session()
    retry = Retry(connect=10, read=10, backoff_factor=0.2)
    adapter = HTTPAdapter(max_retries=retry)
    s.mount('http://', adapter)
    s.mount('https://', adapter)
    return s


def _to_str(x):
    if isinstance(x, str):
        return x
    x = str(x)
    return x.replace('nan', '')


# TODO: Use a time interval parser.
def class_time_to_minute_intervals(input):
  input = input.strip().lower()
  start,end = input.split('-')
  start_h, start_m = start.split(':')
  start_h, start_m = int(start_h), int(start_m)
  end_h, end_m = end.split(' ')[0].split(':')
  end_h, end_m = int(end_h), int(end_m)
  if input.endswith('pm'):
    if end_h < 8: end_h += 12
    if start_h < 8: start_h += 12

  return (start_h * 60 + start_m, end_h * 60 + end_m)
# start_h


class SchedulePrinter(cp_model.CpSolverSolutionCallback):
    """Print valid schedules."""

    def __init__(self, courses, course_vars, objective, limit):
        cp_model.CpSolverSolutionCallback.__init__(self)
        self.__courses = courses
        self.__course_vars = course_vars
        self.__solution_count = 0
        self.__objective = objective
        self.__solutions = []
        self.__limit = limit

    def solution_count(self):
        return self.__solution_count

    def on_solution_callback(self):
        self.__solution_count += 1
        score = self.Value(self.__objective)
        txt = ''
        for t, v in self.__course_vars.items():
            if self.Value(v):
                txt += f'\n{t[0]}  section {t[1]}\n'
                for days, times in self.__courses[t[0]][t[1]].items():
                  days = f'         {days}'
                  txt += f'{days[len(days)-6:]} {times}\n'
        self.__solutions.append((score, txt))

    def print_all(self):
        print('~' * 40)
        print(f'Possible schedules ({self.__solution_count})')
        i = 0
        for x in sorted(self.__solutions, key = lambda x: x[0]):
            print('~' * 40)
            print(f'Score: {x[0]}\n{x[1]}')
            i = i + 1
            if self.__limit and i >= self.__limit:
                break


def generate_schedule(courses, course_groups, preferred_time, limit):
    # Uses Google ortools constraint solver to generate possible schedules.
    # See:
    #   https://developers.google.com/optimization/reference/python/index_python
    #   http://www.hakank.org/google_or_tools/
    #
    # courses is a dict[class][section][dayspec] = timespec, like:
    #  {
    #     'CSCI 145' : {
    #        '44138': { 'MWF': '11:00-11:50 am', 'T': '12:00-01:50 pm'},
    #        '41690': { 'MWF': '10:00-10:50 am', 'W': '12:00-01:50 pm'}},
    #
    #     'PHYS 161' : {
    #        '41569': { 'MWRF': '11:30-12:50 pm', 'T': '04:00-05:50 pm' }},
    #  }
    #
    course_vars = {}
    interval_vars = {
        'M': [],
        'T': [],
        'W': [],
        'R': [],
        'F': [],
    }
    objective = 0

    # Add a time-preference objective function.  Classes staring before or
    # ending after the time range get lower suitability scores.
    p_time = (570, 915) # 9:30 - 3:15
    if preferred_time:
        p_time = class_time_to_minute_intervals(preferred_time)

    course_group_vars = {}

    model = cp_model.CpModel()
    for course, sections in courses.items():
      c = course.lower().replace(' ', '_')

      section_vars = []
      for section, times in sections.items():
        # A boolean variable for each (class, section), indicating the section that is chosen.
        var_name = f'selected_{c}_{section}'
        var = model.NewBoolVar(var_name)
        course_vars[(course, section)] = var
        section_vars.append(var)

        # An optional interval variable for each (class, section, day) covering the class time
        # and controlled by the (class, section) boolean.
        for dayspec, timespec in times.items():
          intervals = class_time_to_minute_intervals(timespec)
          interval_size = intervals[1] - intervals[0]
          for d in dayspec:
            intvar = model.NewOptionalIntervalVar(intervals[0], interval_size, intervals[1], var,  f'time_{c}_{section}_{d}')
            interval_vars[d].append(intvar)
            # Scoring: Each class session counts as 1.
            # Each minute before the interval or after the interval counts as 1.
            objective += var
            if intervals[0] < p_time[0]:
                objective += (max(2, p_time[0]-intervals[0]) * var)
            if intervals[1] > p_time[1]:
                objective += (max(2, intervals[1]-p_time[1]) * var)

      # A constraint for each class that says exactly one section is chosen (sum of booleans = 1).
      for opt_k, opt_s in course_groups.items():
        if course in opt_s:
            course_group_vars.setdefault(opt_k, []).extend(section_vars)

    # At most 1 course is selected from each group.
    for opt_s in course_group_vars.values():
      model.Add(sum(opt_s) == 1)

    # A NoOverlap constraint for all the optional interrvals.
    for d, v in interval_vars.items():
      if v:
        model.AddNoOverlap(v)

    if limit is not None and limit == 0:
        model.Minimize(objective)

    # Now solve...
    printer = SchedulePrinter(courses, course_vars, objective, limit)
    solver = cp_model.CpSolver()
    solver.parameters.enumerate_all_solutions = True
    status = solver.Solve(model, printer)
    printer.print_all()
    return printer.solution_count()


def course_list(curr_yr, term, subject):
    # Read the course list from the web; is there a cleaner location
    # to get it?  Parse the web page here:
    #
    # https://web4u.banner.wwu.edu/pls/wwis/wwskcfnd.TimeTable
    #
    # <select id='term' name='term'>
    #   default: 'All'
    #   format <YEAR><TERM>
    #
    # <INPUT TYPE='hidden' NAME='curr_yr' VALUE='2324'>
    #
    # <select id='subj' name='subj'>
    r = _get_session().post(URI, data={
        'term': term,
        'curr_yr': curr_yr,
        'subj': subject,
    })
    r.raise_for_status()

    result = {}
    # Like most HTML parsing, this is a hack, as the webpage
    # uses tables for formatting.
    #
    # The strategy is to parse each table separately using bs4,
    # convert it to a pandas table, extract whether this is the
    # first row of a
    soup = BeautifulSoup(r.text, features='lxml')
    tables = soup.findAll('table')
    is_data = False
    key = ''
    desc = ''
    for table in tables:
        if table.findParent('table') is None:
            try:
                t = pd.read_html(io.StringIO(str(table)), thousands=None)
                if len(t) != 1:
                    continue
                df = t[0]
            except:
                continue

            if len(df.columns) >= len(HDRS) - 1 and len(df.columns) <= len(HDRS):
                # This 'table' is likely a row in the course schedule.
                # (the site uses bad HTML formatting, where each row is a separate table element)
                # Lab/additional sections may have fewer html columns.
                is_data = True
                if df[0][0] == HDRS[0]:
                    continue  # header row

            else:
                # This 'table' is likely to be merely a formatting construct in the webpage.
                # It may be a class header row, which includes the class name, description, etc.
                # detect this using regular expressions.
                is_data = False
                tmp = str(df[0][0]).strip()
                m = COURSE_RE.match(tmp)
                if m and tmp.startswith(subject):
                    # course name found
                    desc = tmp
                    key = f'{m.group(1).strip()} {m.group(2)}'
                else:
                    key = ''
                    desc = ''

            # For data tables, append them to the existing schedule table.
            if is_data and key:
                # Convert float -> empty string values.
                for x in range(len(df.columns)):
                    df[x] = df[x].apply(_to_str)

                if key not in result:
                    result[key] = df
                    continue

                existing = result[key]

                if len(df.columns) < len(HDRS):
                    # This is something like a lab section, or alternate meeting time on other days.
                    # In any case, fill in the remaining columns with empty string rather than NaN.
                    while len(df.columns) < len(HDRS):
                        df[len(df.columns)] = ''

                    # copy course number
                    df[1] = existing.iloc[-1][1]

                    # copy availability
                    df[7] = existing.iloc[-1][7]
                    df[8] = existing.iloc[-1][8]
                    df[9] = existing.iloc[-1][9]

                result[key] = pd.concat([result[key], df], ignore_index=True)
    return result


def filter_to_requested(courses, requested_classes, missing):
    available = {}
    for x in requested_classes:
        if x not in courses:
            missing.append(x)
            continue
        available[x] = courses[x]
    return available


def filter_to_available(courses, include, exclude, missing):
    available = {}
    for x, df in courses.items():
        unique = df[1].unique()

        # Remove 'TBD' sections
        df = df[df[2] != 'TBD']

        # exclude some sections
        for i in exclude:
            if i in unique:
                df = df[df[1] != i]
        if df.empty:
            continue

        # include some sections
        if 'all' in include:
            available[x] = df
            continue

        # filter by available slots.
        z = df[df[8] < df[9]]
        for i in include:
            if i in unique and i not in z[1].unique():
                z = pd.concat([z, df[df[1] == i]], ignore_index=True)
        if z.empty:
            missing.append(x)
            continue
        available[x] = z
    return available


def run_class_scheduler(args, course_groups, required):
    print('-'*40)
    print(f'Academic year {args.year} quarter {args.term}')
    for it in course_groups.values():
        print('One course from : ' + ' '.join(sorted(it)))
    print('-'*40)

    requested_classes = []
    for g, c in course_groups.items():
        requested_classes.extend(c)

    # Reads the courses from WWU for the requested_classes and term.
    subjects = set()
    for x in requested_classes:
        m = COURSE_RE.match(x)
        if m:
            subjects.add(m.group(1))

    courses = {}
    for x in subjects:
        c = course_list(args.year, args.term, x)
        print(f'Course list for {x} has {len(c)} courses')
        courses.update(c)

    missing = []
    courses = filter_to_requested(courses, requested_classes, missing)
    for c, df in courses.items():
        print('-'*40)
        print(c)
        print(df)

    available = filter_to_available(courses, args.include, args.exclude, missing)
    if missing:
        print('-'*40)
        print(
            f'Courses {",".join(missing)} have no available sections; schedule may be incomplete')
        # If any of the "required" classes are unavailable, exit early.
        for x in missing:
            if x in required:
                return

    # Transform the dataframe to a dict used by generate_schedule.
    transformed = {}
    for k, df in available.items():
        for section in df[1].unique():
            time_df = df[(df[1] == section)]
            for i in range(len(time_df)):
                # c[k,section] implies time_slots from df[2], df[3]
                week_spec = str(time_df.iloc[i][2]).strip()
                timespec = str(time_df.iloc[i][3]).strip()

                if k not in transformed:
                    transformed[k] = {}
                if section not in transformed[k]:
                    transformed[k][section] = {}
                transformed[k][section][week_spec] = timespec


    schedules = generate_schedule(transformed, course_groups, args.preferred_time, args.limit)

    print('~'*40)
    print( f'Generated {schedules} possible schedules for selected courses.')
    print()


def main(argv):
    today = dt.date.today()

    # Determine the academic year for the scheduler, which is e.g. 2324
    academic_year = today.year
    if today.month < 5:
        academic_year = academic_year-1
    default_year = "%02d%02d" %(academic_year-2000, academic_year-1999)

    # Determine the term (quarter) for the scheduler
    # Term is composed of 'YYYY' + Suffix:
    # May 1: Beginning of registration for FALL: 40
    # Nov 1: Beginning of registration for WINTER: 10
    # Feb 15: Beginning of registration for SPRING: 20
    # Summer (30) is never auto-selected.
    if today >= dt.date(year=academic_year, month=5, day=1):
        default_term="%04d40" % (academic_year,)
    if today >= dt.date(year=academic_year, month=11, day=1):
        default_term="%04d10" % (academic_year+1,)
    if today >= dt.date(year=academic_year+1, month=2, day=15):
        default_term="%04d20" % (academic_year+1,)

    parser = argparse.ArgumentParser(
        prog=argv[0], description='Attempt to generate WWU schedule.')
    parser.add_argument('-t', '--term', type=str, nargs='?', default=default_term,
                        help="YYYYTT, where YYYY is the year, and TT is the term (Winter=10, Fall=40).")
    parser.add_argument('-y', '--year', type=str, nargs='?', default=default_year,
                        help="School year. For example, --year=2324.")
    parser.add_argument('-i', '--include', action='append', default=[],
                        help="Include these sections (course numbers) even they have no space.")
    parser.add_argument('-e', '--exclude', action='append', default=[],
                        help="Exclude these sections (course numbers).")
    parser.add_argument('--preferred-time', type=str, default=None,
                        help="Prefer classes which fall within this time range.  Example: '10:00-3:30 pm'")
    parser.add_argument('--limit', default=None, type=int,
                        help="Print the N best scores.")

    args, unknown = parser.parse_known_args(argv[1:])
    failed = False

    course_groups = {}
    required = set()
    mode = '-required'
    groupnum = 0
    for x in unknown:
        if x.startswith('-'):
            x = x.lower()
            if x in ['-limit', '-term', '-year', '-include', '-exclude', '-preferred-time']:
                print(f'Argument should use --; -{x}', file=sys.stderr)
                failed = True
            if x != mode and x != '-oneof':
                groupnum = groupnum + 1
            mode = x
            continue
        course = x.upper().replace('_', ' ')
        if mode.startswith('-r'):
            groupnum = groupnum + 1
            required.add(course)
        course_groups.setdefault(f'{mode}-{groupnum}', set()).add(course)

    if failed or not course_groups:
        print('Error specifying courses.', file=sys.stderr)
        print('Example: ', file=sys.stderr)
        print(f'  python3 {argv[0]} ENG_101 PHYS_161 HIST_112', file=sys.stderr)
        print('', file=sys.stderr)
        return

    run_class_scheduler(args, course_groups, required)


if __name__ == '__main__':
    main(sys.argv)
	#!/usr/bin/env python3
	#
	'''
	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	WWU Schedule Generator
	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	This requires python; it's probably easiest to run on linux or WSL.
	First install the prerequisites:

	$ pip3 install BeautifulSoup4 pandas html5lib ortools lxml

	Then execute the script using python3:

	$ python3 wwu_schedule.py 'CSCI 145' 'ENG 101' 'PHYS 161'


	The script will attempt to generate all possible schedules for each of
	the listed classes.

	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	Options include:

	--term indicates the term. This should be year followed by the semester code:

	Winter 10
	Spring 20
	Summer 30
	Fall 40

	--year <academic year> Specify the academic year.
	This should be the last two digits of the two consecutive years in which
	the term falls, such as '2324'.

	--limit <number> Number of scheduled printed.
	By default prints all possible schedules.

	--include (-i) <section>
	Includes the specific class section (course number).

	--exclude (-e) <section>
	Excludes the specific class section (course number).


	--preferred-time <time spec>
	Sets a preferred time range used to score schedules.
	Classes staring before or after this range are penalized.

	Example: "9:00-3:30 pm"

	-a, -b, -c, -d <course>
	Out of courses specified by -a, only one section will be scheduled.
	Likewise for -b/-c/-d.


	The simplest scheduled generator might be for two classes, like:

	$ python wwu_schedule.py ENG_101 MATH_125 --limit 0


	If more class selection is useful, increase the limit:


	$ python wwu_schedule.py ENG_101 MATH_125 --limit 5


	An even more complex invocation, which prefers 10:00 am classes
	and needs to exclude some sections, might look like this:

	$ python wwu_schedule.py PHYS_162 MATH_125 ENG_201 \
	--preferred-time "10:00-2:15 pm" \
	-i 10655 -e 10232 --limit 3


	To generate a class list with the following combination of classes:
	PHYS_163
	HIST_112
	Any of CSCI 241, 247, or 301
	Any of COMM 224 or 235

	$ python wwu_schedule1.py PHYS_163 HIST_112 \
	-a CSCI_241 CSCI_301 CSCI_247 \
	-b COMM_235 -b COMM_224


	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	'''

	import requests
	import re
	import pandas as pd
	import lxml
	import html5lib
	import datetime as dt
	import argparse
	import sys
	import io

	from ortools.sat.python import cp_model

	from bs4 import BeautifulSoup

	from requests.adapters import HTTPAdapter
	from requests.packages.urllib3.util.retry import Retry

	URI = 'https://web4u.banner.wwu.edu/pls/wwis/wwskcfnd.TimeTable'

	# WWU courses are identified by a 3- or 4- letter department prefix and a 3 digit
	# catalog number.
	COURSE_RE = re.compile(
	r'('
	r'ACCT\|AHE\|AMST\|ASLC\|ANTH\|ARAB\|ART\|A/HI\|AECI\|AUAP\|ASTR\|BNS\|BIOL\|BUS\|C/AM\|CHEM\|CHIN\|'
	r'CLST\|CD\|CSEC\|CSD\|COMM\|C2C\|CSCI\|CISS\|DISA\|DNC\|DATA\|DSCI\|DSGN\|DIAD\|ECE\|EAST\|ECON\|EDUC\|'
	r'EDAD\|ESJ\|EECE\|ELED\|ENRG\|ENGR\|ENG\|ENTR\|ESCI\|ENVS\|EUS\|EXCE\|FAIR\|FIN\|FREN\|GEOL\|GERM\|GRAD\|'
	r'GREK\|HLED\|HIST\|HGST\|HNRS\|HRM\|HSP\|HUMA\|ID\|IT\|IEP\|IBUS\|INTL\|ITAL\|JAPN\|JOUR\|KIN\|LAT\|LDST\|'
	r'LIBR\|LING\|MGMT\|MIS\|MFGE\|MACS\|MKTG\|MBA\|MPAC\|MSCI\|MATH\|M/CS\|LANG\|MDS\|MLE\|MUS\|NURS\|OPS\|'
	r'PHIL\|PA\|PE\|PEH\|PHYS\|PLSC\|PME\|PORT\|PSY\|RECR\|RC\|REL\|RUSS\|SALI\|SCED\|SEC\|SMNR\|SOC\|SPAN\|SPED\|'
	r'SUST\|TEOP\|TESL\|THTR\|UEPP\|WGSS'
	r')\s+(\d\d\d)(\s\|$)')

	HDRS = [
	'Term',
	'Crn', # 1 (section)
	'Days', # 2
	'Time', # 3
	'Instructor',
	'Room',
	'Addl Fees',
	'Cap', # 7
	'Enrl', # 8
	'Avail', # 9
	'Waitlist',
	'Restrictions',
	'Attributes'
	]


	def _get_session():
	s = requests.Session()
	retry = Retry(connect=10, read=10, backoff_factor=0.2)
	adapter = HTTPAdapter(max_retries=retry)
	s.mount('http://', adapter)
	s.mount('https://', adapter)
	return s


	def _to_str(x):
	if isinstance(x, str):
	return x
	x = str(x)
	return x.replace('nan', '')


	# TODO: Use a time interval parser.
	def class_time_to_minute_intervals(input):
	input = input.strip().lower()
	start,end = input.split('-')
	start_h, start_m = start.split(':')
	start_h, start_m = int(start_h), int(start_m)
	end_h, end_m = end.split(' ')[0].split(':')
	end_h, end_m = int(end_h), int(end_m)
	if input.endswith('pm'):
	if end_h < 8: end_h += 12
	if start_h < 8: start_h += 12

	return (start_h * 60 + start_m, end_h * 60 + end_m)
	# start_h


	class SchedulePrinter(cp_model.CpSolverSolutionCallback):
	"""Print valid schedules."""

	def __init__(self, courses, course_vars, objective, limit):
	cp_model.CpSolverSolutionCallback.__init__(self)
	self.__courses = courses
	self.__course_vars = course_vars
	self.__solution_count = 0
	self.__objective = objective
	self.__solutions = []
	self.__limit = limit

	def solution_count(self):
	return self.__solution_count

	def on_solution_callback(self):
	self.__solution_count += 1
	score = self.Value(self.__objective)
	txt = ''
	for t, v in self.__course_vars.items():
	if self.Value(v):
	txt += f'\n{t[0]} section {t[1]}\n'
	for days, times in self.__courses[t[0]][t[1]].items():
	days = f' {days}'
	txt += f'{days[len(days)-6:]} {times}\n'
	self.__solutions.append((score, txt))

	def print_all(self):
	print('~' * 40)
	print(f'Possible schedules ({self.__solution_count})')
	i = 0
	for x in sorted(self.__solutions, key = lambda x: x[0]):
	print('~' * 40)
	print(f'Score: {x[0]}\n{x[1]}')
	i = i + 1
	if self.__limit and i >= self.__limit:
	break



	def generate_schedule(courses, course_groups, preferred_time, limit):
	# Uses Google ortools constraint solver to generate possible schedules.
	# See:
	# https://developers.google.com/optimization/reference/python/index_python
	# http://www.hakank.org/google_or_tools/
	#
	# courses is a dict[class][section][dayspec] = timespec, like:
	# {
	# 'CSCI 145' : {
	# '44138': { 'MWF': '11:00-11:50 am', 'T': '12:00-01:50 pm'},
	# '41690': { 'MWF': '10:00-10:50 am', 'W': '12:00-01:50 pm'}},
	#
	# 'PHYS 161' : {
	# '41569': { 'MWRF': '11:30-12:50 pm', 'T': '04:00-05:50 pm' }},
	# }
	#
	course_vars = {}
	interval_vars = {
	'M': [],
	'T': [],
	'W': [],
	'R': [],
	'F': [],
	}
	objective = 0

	# Add a time-preference objective function. Classes staring before or
	# ending after the time range get lower suitability scores.
	p_time = (570, 915) # 9:30 - 3:15
	if preferred_time:
	p_time = class_time_to_minute_intervals(preferred_time)

	course_group_vars = {}

	model = cp_model.CpModel()
	for course, sections in courses.items():
	c = course.lower().replace(' ', '_')

	section_vars = []
	for section, times in sections.items():
	# A boolean variable for each (class, section), indicating the section that is chosen.
	var_name = f'selected_{c}_{section}'
	var = model.NewBoolVar(var_name)
	course_vars[(course, section)] = var
	section_vars.append(var)

	# An optional interval variable for each (class, section, day) covering the class time
	# and controlled by the (class, section) boolean.
	for dayspec, timespec in times.items():
	intervals = class_time_to_minute_intervals(timespec)
	interval_size = intervals[1] - intervals[0]
	for d in dayspec:
	intvar = model.NewOptionalIntervalVar(intervals[0], interval_size, intervals[1], var, f'time_{c}_{section}_{d}')
	interval_vars[d].append(intvar)
	# Scoring: Each class session counts as 1.
	# Each minute before the interval or after the interval counts as 1.
	objective += var
	if intervals[0] < p_time[0]:
	objective += (max(2, p_time[0]-intervals[0]) * var)
	if intervals[1] > p_time[1]:
	objective += (max(2, intervals[1]-p_time[1]) * var)

	# A constraint for each class that says exactly one section is chosen (sum of booleans = 1).
	for opt_k, opt_s in course_groups.items():
	if course in opt_s:
	course_group_vars.setdefault(opt_k, []).extend(section_vars)

	# At most 1 course is selected from each group.
	for opt_s in course_group_vars.values():
	model.Add(sum(opt_s) == 1)

	# A NoOverlap constraint for all the optional interrvals.
	for d, v in interval_vars.items():
	if v:
	model.AddNoOverlap(v)

	if limit is not None and limit == 0:
	model.Minimize(objective)

	# Now solve...
	printer = SchedulePrinter(courses, course_vars, objective, limit)
	solver = cp_model.CpSolver()
	solver.parameters.enumerate_all_solutions = True
	status = solver.Solve(model, printer)
	printer.print_all()
	return printer.solution_count()


	def course_list(curr_yr, term, subject):
	# Read the course list from the web; is there a cleaner location
	# to get it? Parse the web page here:
	#
	# https://web4u.banner.wwu.edu/pls/wwis/wwskcfnd.TimeTable
	#
	# <select id='term' name='term'>
	# default: 'All'
	# format <YEAR><TERM>
	#
	# <INPUT TYPE='hidden' NAME='curr_yr' VALUE='2324'>
	#
	# <select id='subj' name='subj'>
	r = _get_session().post(URI, data={
	'term': term,
	'curr_yr': curr_yr,
	'subj': subject,
	})
	r.raise_for_status()

	result = {}
	# Like most HTML parsing, this is a hack, as the webpage
	# uses tables for formatting.
	#
	# The strategy is to parse each table separately using bs4,
	# convert it to a pandas table, extract whether this is the
	# first row of a
	soup = BeautifulSoup(r.text, features='lxml')
	tables = soup.findAll('table')
	is_data = False
	key = ''
	desc = ''
	for table in tables:
	if table.findParent('table') is None:
	try:
	t = pd.read_html(io.StringIO(str(table)), thousands=None)
	if len(t) != 1:
	continue
	df = t[0]
	except:
	continue

	if len(df.columns) >= len(HDRS) - 1 and len(df.columns) <= len(HDRS):
	# This 'table' is likely a row in the course schedule.
	# (the site uses bad HTML formatting, where each row is a separate table element)
	# Lab/additional sections may have fewer html columns.
	is_data = True
	if df[0][0] == HDRS[0]:
	continue # header row

	else:
	# This 'table' is likely to be merely a formatting construct in the webpage.
	# It may be a class header row, which includes the class name, description, etc.
	# detect this using regular expressions.
	is_data = False
	tmp = str(df[0][0]).strip()
	m = COURSE_RE.match(tmp)
	if m and tmp.startswith(subject):
	# course name found
	desc = tmp
	key = f'{m.group(1).strip()} {m.group(2)}'
	else:
	key = ''
	desc = ''

	# For data tables, append them to the existing schedule table.
	if is_data and key:
	# Convert float -> empty string values.
	for x in range(len(df.columns)):
	df[x] = df[x].apply(_to_str)

	if key not in result:
	result[key] = df
	continue

	existing = result[key]

	if len(df.columns) < len(HDRS):
	# This is something like a lab section, or alternate meeting time on other days.
	# In any case, fill in the remaining columns with empty string rather than NaN.
	while len(df.columns) < len(HDRS):
	df[len(df.columns)] = ''

	# copy course number
	df[1] = existing.iloc[-1][1]

	# copy availability
	df[7] = existing.iloc[-1][7]
	df[8] = existing.iloc[-1][8]
	df[9] = existing.iloc[-1][9]

	result[key] = pd.concat([result[key], df], ignore_index=True)
	return result


	def filter_to_requested(courses, requested_classes, missing):
	available = {}
	for x in requested_classes:
	if x not in courses:
	missing.append(x)
	continue
	available[x] = courses[x]
	return available


	def filter_to_available(courses, include, exclude, missing):
	available = {}
	for x, df in courses.items():
	unique = df[1].unique()

	# Remove 'TBD' sections
	df = df[df[2] != 'TBD']

	# exclude some sections
	for i in exclude:
	if i in unique:
	df = df[df[1] != i]
	if df.empty:
	continue

	# include some sections
	if 'all' in include:
	available[x] = df
	continue

	# filter by available slots.
	z = df[df[8] < df[9]]
	for i in include:
	if i in unique and i not in z[1].unique():
	z = pd.concat([z, df[df[1] == i]], ignore_index=True)
	if z.empty:
	missing.append(x)
	continue
	available[x] = z
	return available


	def run_class_scheduler(args, course_groups, required):
	print('-'*40)
	print(f'Academic year {args.year} quarter {args.term}')
	for it in course_groups.values():
	print('One course from : ' + ' '.join(sorted(it)))
	print('-'*40)

	requested_classes = []
	for g, c in course_groups.items():
	requested_classes.extend(c)

	# Reads the courses from WWU for the requested_classes and term.
	subjects = set()
	for x in requested_classes:
	m = COURSE_RE.match(x)
	if m:
	subjects.add(m.group(1))

	courses = {}
	for x in subjects:
	c = course_list(args.year, args.term, x)
	print(f'Course list for {x} has {len(c)} courses')
	courses.update(c)

	missing = []
	courses = filter_to_requested(courses, requested_classes, missing)
	for c, df in courses.items():
	print('-'*40)
	print(c)
	print(df)

	available = filter_to_available(courses, args.include, args.exclude, missing)
	if missing:
	print('-'*40)
	print(
	f'Courses {",".join(missing)} have no available sections; schedule may be incomplete')
	# If any of the "required" classes are unavailable, exit early.
	for x in missing:
	if x in required:
	return

	# Transform the dataframe to a dict used by generate_schedule.
	transformed = {}
	for k, df in available.items():
	for section in df[1].unique():
	time_df = df[(df[1] == section)]
	for i in range(len(time_df)):
	# c[k,section] implies time_slots from df[2], df[3]
	week_spec = str(time_df.iloc[i][2]).strip()
	timespec = str(time_df.iloc[i][3]).strip()

	if k not in transformed:
	transformed[k] = {}
	if section not in transformed[k]:
	transformed[k][section] = {}
	transformed[k][section][week_spec] = timespec


	schedules = generate_schedule(transformed, course_groups, args.preferred_time, args.limit)

	print('~'*40)
	print( f'Generated {schedules} possible schedules for selected courses.')
	print()


	def main(argv):
	today = dt.date.today()

	# Determine the academic year for the scheduler, which is e.g. 2324
	academic_year = today.year
	if today.month < 5:
	academic_year = academic_year-1
	default_year = "%02d%02d" %(academic_year-2000, academic_year-1999)

	# Determine the term (quarter) for the scheduler
	# Term is composed of 'YYYY' + Suffix:
	# May 1: Beginning of registration for FALL: 40
	# Nov 1: Beginning of registration for WINTER: 10
	# Feb 15: Beginning of registration for SPRING: 20
	# Summer (30) is never auto-selected.
	if today >= dt.date(year=academic_year, month=5, day=1):
	default_term="%04d40" % (academic_year,)
	if today >= dt.date(year=academic_year, month=11, day=1):
	default_term="%04d10" % (academic_year+1,)
	if today >= dt.date(year=academic_year+1, month=2, day=15):
	default_term="%04d20" % (academic_year+1,)

	parser = argparse.ArgumentParser(
	prog=argv[0], description='Attempt to generate WWU schedule.')
	parser.add_argument('-t', '--term', type=str, nargs='?', default=default_term,
	help="YYYYTT, where YYYY is the year, and TT is the term (Winter=10, Fall=40).")
	parser.add_argument('-y', '--year', type=str, nargs='?', default=default_year,
	help="School year. For example, --year=2324.")
	parser.add_argument('-i', '--include', action='append', default=[],
	help="Include these sections (course numbers) even they have no space.")
	parser.add_argument('-e', '--exclude', action='append', default=[],
	help="Exclude these sections (course numbers).")
	parser.add_argument('--preferred-time', type=str, default=None,
	help="Prefer classes which fall within this time range. Example: '10:00-3:30 pm'")
	parser.add_argument('--limit', default=None, type=int,
	help="Print the N best scores.")

	args, unknown = parser.parse_known_args(argv[1:])
	failed = False

	course_groups = {}
	required = set()
	mode = '-required'
	groupnum = 0
	for x in unknown:
	if x.startswith('-'):
	x = x.lower()
	if x in ['-limit', '-term', '-year', '-include', '-exclude', '-preferred-time']:
	print(f'Argument should use --; -{x}', file=sys.stderr)
	failed = True
	if x != mode and x != '-oneof':
	groupnum = groupnum + 1
	mode = x
	continue
	course = x.upper().replace('_', ' ')
	if mode.startswith('-r'):
	groupnum = groupnum + 1
	required.add(course)
	course_groups.setdefault(f'{mode}-{groupnum}', set()).add(course)

	if failed or not course_groups:
	print('Error specifying courses.', file=sys.stderr)
	print('Example: ', file=sys.stderr)
	print(f' python3 {argv[0]} ENG_101 PHYS_161 HIST_112', file=sys.stderr)
	print('', file=sys.stderr)
	return

	run_class_scheduler(args, course_groups, required)


	if __name__ == '__main__':
	main(sys.argv)