THargreaves/scheduler.py

## scheduler.py
#!/usr/bin/env python

"""schedule.py: Meeting scheduler based on Ford–Fulkerson algorithm.

A tool to generate or prove the lack of existance of a schedule for one-on-one
meetings given the availabilities of attendees for different time slots.

The solution is based on a modification of the Ford-Fulkerson algorithm in
which edge capacities are binary. By converting the the availability matrix
into a bipartite graph with edges of capacity one and introducing source and
sink nodes connecting to each half (also with capacitity one), we can see that
a valid schedule is equivalent to obtaining a maximal flow each to the number
of attendees.

When multiple solutions exist, the greedy nature of the algorithm will ensure
that the times generated are the earliest possible. By reversing the BFS order,
we could instead show a preference for later times.
"""

__author__  = "Tim Hargreaves"
__date__    = "2021-02-24"
__license__ = "MIT"
__version__ = "0.1"

import datetime as dt
from queue import Queue
import os

import numpy as np
import pandas as pd


#### SETUP ####

# Check availability spreadsheet exists
if not os.path.exists('availability_template.xlsx'):
    raise OSError("Could not found `availability_template.xlsx` in working "
                  "directory.")

# Load raw availability dataframe
raw = pd.read_excel('availability_template.xlsx', header=(0, 1), index_col=0)
raw = raw.fillna(1).replace('x', 0).astype('bool')
raw = raw[raw.index.notnull()]

# Process attendee names, possible times, and availibility
names = raw.index.values
times = np.vectorize(
    lambda t: t[0] + " " + t[1].strftime('%H:%M')
)(raw.columns.values)
availability = raw.values

# Convert availibility into adjacency matrix of corresponding graph
M, N = availability.shape
K = M + N + 2  # adjency matrix is K x K
adj = np.concatenate((
    # Source
    np.array([[False] + [True] * M + [False] * (N + 1)]),
    # Bipartite graph
    np.concatenate((
        # Attendees
        np.concatenate((
            np.full((M, M+1), False),
            availability,
            np.full((M, 1), False)
        ), axis=1),
        # Meetings
        np.concatenate((
            np.full((N, M+N+1), False),
            np.full((N, 1), True)
        ), axis=1),
    ), axis=0),
    # Sink
    np.full((1, N+M+2), False),
), axis=0)

#### SOLVER ####

# Breadth-first search helper
def find_flow(cap, s, t):
    """
    Use breadth-first search to find an augmenting flow between two nodes.

    Args:
        adj (ndarray): Capacity
        s (int): Index of source node
        t (int): Index of target node

    Returns:
        path (bool): Whether there is a flow between the two nodes
        parents (dict): A dictionary of node parents in flow
    """

    # Mark only source node as visited and add to queue
    visited = np.arange(K) == s
    queue = Queue()
    queue.put(s)

    # Keep track of parents
    parent = {}

    # Complete BFS
    while not queue.empty():

        u = queue.get()

        # Loop through all neighbours of u
        for v in np.flatnonzero(cap[u, :] > 0):
            if not visited[v]:
                queue.put(v)
                visited[v] = True
                parent[v] = u

    return visited[t], parent

## Ford-Fulkerson algorithm
# Keep track of node parents
parents = np.empty((K,), dtype=np.uint)

# Intitially no flow
max_flow = 0

# Convert adjecency matrix to residual capacity matrix
cap = adj.astype(np.int)

# Augment flow if there is a path from source to sink
while True:

    # Check for flow
    flow, parent = find_flow(cap, 0, K-1)
    if not flow: break
    for c, p in parent.items(): parents[c] = p

    # Increment flow
    max_flow += 1

    # Update residual capacities and reverse edges along flow
    v = K - 1
    while v != 0:
        u = parents[v]
        cap[u, v] -= 1
        cap[v, u] += 1
        v = parents[v]

### OUTPUT ###

if max_flow == M:
    print("Perfect solution found:")
    print(pd.DataFrame({
        'name': names,
        'time': times[parents[1:(M + 1)] - (M + 1)]
    }))
else:
    print("No perfect solution exists.")
	#!/usr/bin/env python

	"""schedule.py: Meeting scheduler based on Ford–Fulkerson algorithm.

	A tool to generate or prove the lack of existance of a schedule for one-on-one
	meetings given the availabilities of attendees for different time slots.

	The solution is based on a modification of the Ford-Fulkerson algorithm in
	which edge capacities are binary. By converting the the availability matrix
	into a bipartite graph with edges of capacity one and introducing source and
	sink nodes connecting to each half (also with capacitity one), we can see that
	a valid schedule is equivalent to obtaining a maximal flow each to the number
	of attendees.

	When multiple solutions exist, the greedy nature of the algorithm will ensure
	that the times generated are the earliest possible. By reversing the BFS order,
	we could instead show a preference for later times.
	"""

	__author__ = "Tim Hargreaves"
	__date__ = "2021-02-24"
	__license__ = "MIT"
	__version__ = "0.1"

	import datetime as dt
	from queue import Queue
	import os

	import numpy as np
	import pandas as pd


	#### SETUP ####

	# Check availability spreadsheet exists
	if not os.path.exists('availability_template.xlsx'):
	raise OSError("Could not found `availability_template.xlsx` in working "
	"directory.")

	# Load raw availability dataframe
	raw = pd.read_excel('availability_template.xlsx', header=(0, 1), index_col=0)
	raw = raw.fillna(1).replace('x', 0).astype('bool')
	raw = raw[raw.index.notnull()]

	# Process attendee names, possible times, and availibility
	names = raw.index.values
	times = np.vectorize(
	lambda t: t[0] + " " + t[1].strftime('%H:%M')
	)(raw.columns.values)
	availability = raw.values

	# Convert availibility into adjacency matrix of corresponding graph
	M, N = availability.shape
	K = M + N + 2 # adjency matrix is K x K
	adj = np.concatenate((
	# Source
	np.array([[False] + [True] * M + [False] * (N + 1)]),
	# Bipartite graph
	np.concatenate((
	# Attendees
	np.concatenate((
	np.full((M, M+1), False),
	availability,
	np.full((M, 1), False)
	), axis=1),
	# Meetings
	np.concatenate((
	np.full((N, M+N+1), False),
	np.full((N, 1), True)
	), axis=1),
	), axis=0),
	# Sink
	np.full((1, N+M+2), False),
	), axis=0)

	#### SOLVER ####

	# Breadth-first search helper
	def find_flow(cap, s, t):
	"""
	Use breadth-first search to find an augmenting flow between two nodes.

	Args:
	adj (ndarray): Capacity
	s (int): Index of source node
	t (int): Index of target node

	Returns:
	path (bool): Whether there is a flow between the two nodes
	parents (dict): A dictionary of node parents in flow
	"""

	# Mark only source node as visited and add to queue
	visited = np.arange(K) == s
	queue = Queue()
	queue.put(s)

	# Keep track of parents
	parent = {}

	# Complete BFS
	while not queue.empty():

	u = queue.get()

	# Loop through all neighbours of u
	for v in np.flatnonzero(cap[u, :] > 0):
	if not visited[v]:
	queue.put(v)
	visited[v] = True
	parent[v] = u

	return visited[t], parent

	## Ford-Fulkerson algorithm
	# Keep track of node parents
	parents = np.empty((K,), dtype=np.uint)

	# Intitially no flow
	max_flow = 0

	# Convert adjecency matrix to residual capacity matrix
	cap = adj.astype(np.int)

	# Augment flow if there is a path from source to sink
	while True:

	# Check for flow
	flow, parent = find_flow(cap, 0, K-1)
	if not flow: break
	for c, p in parent.items(): parents[c] = p

	# Increment flow
	max_flow += 1

	# Update residual capacities and reverse edges along flow
	v = K - 1
	while v != 0:
	u = parents[v]
	cap[u, v] -= 1
	cap[v, u] += 1
	v = parents[v]

	### OUTPUT ###

	if max_flow == M:
	print("Perfect solution found:")
	print(pd.DataFrame({
	'name': names,
	'time': times[parents[1:(M + 1)] - (M + 1)]
	}))
	else:
	print("No perfect solution exists.")