gigamonkey/find-times.py

## find-times.py
#!/usr/bin/env python

"""
Find times to hold the CSA Zoom meeting based on when people said they could do it.
Given a number N, find the N times that maximize the number of people who can come.
"""

from datetime import datetime
from functools import reduce
import json
import re
import sys
from collections import defaultdict


def times_for_person(times, datum):
    "Generate the times a given person can make it based on their datum."
    for col, hour in times.items():
        for date in datum[col].split(", "):
            if date:
                if m := re.search(r"(\w+) (\d+)/(\d+)", date):
                    year = 2024
                    month = int(m.group(2))
                    day = int(m.group(3))
                    yield datetime(year, month, day, hour).strftime(
                        "%Y-%m-%d %H:%M (%I:%M %p)"
                    )
                else:
                    raise f"Bad date: {date}"


def extract_times(header):
    "Extract the times values from the spreadsheet header columns."
    times = {}
    for h in header:
        if m := re.search(r"(\d+)(am|pm)", h):
            hour = int(m.group(1)) + (0 if m.group(2) == "am" else 12)
            # kludge to deal with noon, okay since there are no 12am options
            times[h] = hour if hour != 24 else 12
    return times


def get_data(filename):
    with open(filename) as f:
        rows = [line[:-1].split("\t") for line in f]
        header = rows[0]
        data = [dict(zip(header, row)) for row in rows[1:]]

    return parse_spreadsheet(header, data)


def parse_spreadsheet(header, data):
    "Turn the spreadsheet into two dicts, one mapping times to people and the other people to times."

    times = extract_times(header)
    times_to_people = defaultdict(set)
    people_to_times = defaultdict(set)
    for datum in data:
        email = datum["Email Address"]
        for t in times_for_person(times, datum):
            times_to_people[t].add(email)
            people_to_times[email].add(t)
    return times_to_people, people_to_times


def best(n, times_to_people, scheduled=None):
    "Find the best n times so the most people can attend at least one session."
    if scheduled is None:
        scheduled = []
    people = reduce(lambda acc, ps: acc | ps, times_to_people.values(), set())
    memo = {}

    def best_helper(times, solution):
        key = (frozenset(times), frozenset(solution[1]))
        if key in memo:
            return memo[key]
        else:
            ts, covered = solution
            if covered == people or not times or len(solution[0]) == n:
                memo[key] = solution
                return memo[key]
            else:
                t, *rest = times
                a = best_helper(
                    rest,
                    (ts | {t}, covered | times_to_people[t]),
                )
                b = best_helper(rest, (ts, covered))
                memo[key] = a if len(a[1]) >= len(b[1]) else b
                return memo[key]

    starting = (
        set(scheduled),
        reduce(lambda acc, t: acc | times_to_people[t], scheduled, set()),
    )

    times, covered = best_helper(times_to_people.keys(), starting)
    return times, people - covered


def show_times(times, times_to_people, people_to_times):
    "Show the times in chronological order plus the people who can't come."
    for t in sorted(times):
        print(f"\n{t}: {len(times_to_people[t])}")
        for p in sorted(times_to_people[t]):
            print(f"  {p}")


def show_uncovered(uncovered):
    print(f"\nUncovered: {len(uncovered)}")
    for p in sorted(uncovered):
        print(f"  {p} {len(people_to_times[p])}")


filename = sys.argv[1]
n = int(sys.argv[2])

# Time strings of any already scheduled meetings
scheduled = ["2024-06-10 16:00 (04:00 PM)"]

times_to_people, people_to_times = get_data(filename)
times, uncovered = best(n, times_to_people, scheduled)

show_times(times, times_to_people, people_to_times)
show_uncovered(uncovered)
	#!/usr/bin/env python

	"""
	Find times to hold the CSA Zoom meeting based on when people said they could do it.
	Given a number N, find the N times that maximize the number of people who can come.
	"""

	from datetime import datetime
	from functools import reduce
	import json
	import re
	import sys
	from collections import defaultdict


	def times_for_person(times, datum):
	"Generate the times a given person can make it based on their datum."
	for col, hour in times.items():
	for date in datum[col].split(", "):
	if date:
	if m := re.search(r"(\w+) (\d+)/(\d+)", date):
	year = 2024
	month = int(m.group(2))
	day = int(m.group(3))
	yield datetime(year, month, day, hour).strftime(
	"%Y-%m-%d %H:%M (%I:%M %p)"
	)
	else:
	raise f"Bad date: {date}"


	def extract_times(header):
	"Extract the times values from the spreadsheet header columns."
	times = {}
	for h in header:
	if m := re.search(r"(\d+)(am\|pm)", h):
	hour = int(m.group(1)) + (0 if m.group(2) == "am" else 12)
	# kludge to deal with noon, okay since there are no 12am options
	times[h] = hour if hour != 24 else 12
	return times


	def get_data(filename):
	with open(filename) as f:
	rows = [line[:-1].split("\t") for line in f]
	header = rows[0]
	data = [dict(zip(header, row)) for row in rows[1:]]

	return parse_spreadsheet(header, data)


	def parse_spreadsheet(header, data):
	"Turn the spreadsheet into two dicts, one mapping times to people and the other people to times."

	times = extract_times(header)
	times_to_people = defaultdict(set)
	people_to_times = defaultdict(set)
	for datum in data:
	email = datum["Email Address"]
	for t in times_for_person(times, datum):
	times_to_people[t].add(email)
	people_to_times[email].add(t)
	return times_to_people, people_to_times


	def best(n, times_to_people, scheduled=None):
	"Find the best n times so the most people can attend at least one session."
	if scheduled is None:
	scheduled = []
	people = reduce(lambda acc, ps: acc \| ps, times_to_people.values(), set())
	memo = {}

	def best_helper(times, solution):
	key = (frozenset(times), frozenset(solution[1]))
	if key in memo:
	return memo[key]
	else:
	ts, covered = solution
	if covered == people or not times or len(solution[0]) == n:
	memo[key] = solution
	return memo[key]
	else:
	t, *rest = times
	a = best_helper(
	rest,
	(ts \| {t}, covered \| times_to_people[t]),
	)
	b = best_helper(rest, (ts, covered))
	memo[key] = a if len(a[1]) >= len(b[1]) else b
	return memo[key]

	starting = (
	set(scheduled),
	reduce(lambda acc, t: acc \| times_to_people[t], scheduled, set()),
	)

	times, covered = best_helper(times_to_people.keys(), starting)
	return times, people - covered


	def show_times(times, times_to_people, people_to_times):
	"Show the times in chronological order plus the people who can't come."
	for t in sorted(times):
	print(f"\n{t}: {len(times_to_people[t])}")
	for p in sorted(times_to_people[t]):
	print(f" {p}")


	def show_uncovered(uncovered):
	print(f"\nUncovered: {len(uncovered)}")
	for p in sorted(uncovered):
	print(f" {p} {len(people_to_times[p])}")


	filename = sys.argv[1]
	n = int(sys.argv[2])

	# Time strings of any already scheduled meetings
	scheduled = ["2024-06-10 16:00 (04:00 PM)"]

	times_to_people, people_to_times = get_data(filename)
	times, uncovered = best(n, times_to_people, scheduled)

	show_times(times, times_to_people, people_to_times)
	show_uncovered(uncovered)