Skip to content

Instantly share code, notes, and snippets.

@maweigert
Last active March 20, 2020 19:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maweigert/e33f7dd1ec1b1156816ccc6c8abc80cd to your computer and use it in GitHub Desktop.
Save maweigert/e33f7dd1ec1b1156816ccc6c8abc80cd to your computer and use it in GitHub Desktop.
""" Selection problem
pip install pulp termcolor
To extend and adopt possible constraints change the following:
1) update LANGUAGES to cover all appearing languages
e.g. add
"AR":["Arabic","Arab"]
if it might appear in the csv
2) update setup_constraints with your custom predicates and constraint numbers
A predicate is simply a
function(x) -> (True,False)
that takes a dataframe record x as an input and outputs whether the predicate is True for x, e.g.
p_age = Predicate(lambda x: x.Age>30)
A constraint is a tuple of (predicate, MinMax(min,max) ) where min (max) is the minimal (maximal) number of records to fullfill the predicate, e.g.
constraints = (
( Predicate(lambda x: x.Age>30), MinMax(3,8) ),
...
)
"""
import numpy as np
import pandas as pd
from collections import namedtuple
import pulp
from functools import reduce
from termcolor import colored, cprint
# Definition of languages and potential synonyms
LANGUAGES={
"DE":["German", "Deutsch"],
"PE":["Persian"],
"HI":["Hindi"],
"RU":["Russian","RUS"],
"IT":["Italian"],
"SP":["Spanish","ESP"],
"FR":["French"],
"EN":["English"],
}
def cleanup(df):
for k, v in LANGUAGES.items():
for val in v:
df.Languages.replace({f"(^|[^a-zA-Z])({val}?)($|[^a-zA-Z])":f"\\1{k}\\3"}, regex =True, inplace = True)
df = df.rename(columns=dict((k,k.strip().replace(" ","_")) for k in df.columns))
return df
def _and(x,y):
return x and y
def _or(x,y):
return x or y
def contains(s,elements, reduce_func=_and):
"""helper function, checks for all strings in <elements> if they are in s
and returns depending on reduce_func e.g. True if for all/one/some this is True"""
if isinstance(elements, str):
elements = (elements,)
return reduce(reduce_func,(el.lower() in str(s).lower() for el in elements))
class Predicate(object):
def __init__(self, func, name = "predicate"):
self.func = func
self.name = name
def __call__(self, x):
return self.func(x)
def __and__(self, other):
return Predicate(lambda x: self(x) and other(x), name = f"{self} AND {other}")
def __or__(self, other):
return Predicate(lambda x: self(x) or other(x), name = f"{self} OR {other}")
def __invert__(self):
return Predicate(lambda x: not self(x), name = f"NOT {self}")
def __repr__(self):
return f"{self.name}"
MinMax = namedtuple("MinMax",["min","max"])
def setup_constraints(df):
# PREDICATES DEFINITION ============
# Predicate(func, name)
# func: function accepting a Dataframe record and returning True or False, e.g.
# p = Predicate(lambda x: x.Age>30)
# predicates can also be logically combined (with &, |, and ~), e.g
# p1 = Predicate(lambda x: x.Age>30)
# p2 = Predicate(lambda x: contains(x.Languages,"EN")
# p3 = p1 & p2
n_person = len(df)
p_driver = Predicate(
lambda x: contains(x.Drivers_License,("Stick","Automatic"),_or),
name = "driver"
)
p_driver_stick = Predicate(lambda x: contains(x.Drivers_License,"Stick"),
name="driver with stick")
p_langs = {
k:Predicate(lambda x, k=k: contains(x.Languages,k), name = f"lang_{k}") for k in LANGUAGES.keys()
}
p_elderly = Predicate(lambda x: x.Age>=60, name = "older")
p_driver_EN = p_driver & p_langs["EN"]
# CONSTRAINT DEFINITION ============
# a list of tuples (predicate, MinMax(min_number, max_number) )
constraints = (
(p_driver, MinMax(10,n_person)),
(p_driver_stick, MinMax(7,n_person)),
(p_driver & p_langs["EN"], MinMax(2,n_person)),
(p_langs["DE"] & p_langs["RU"] , MinMax(1,n_person)),
(p_langs["DE"] & p_langs["PE"] , MinMax(1,n_person)),
(p_langs["DE"] & p_langs["SP"] , MinMax(1,n_person)),
(p_langs["DE"] & p_langs["FR"] , MinMax(1,n_person)),
(p_elderly , MinMax(0,5)),
)
return constraints
def solve_model(df, constraints):
# ILP MODEL ============
n_person = len(df)
model = pulp.LpProblem("Matching", pulp.LpMinimize)
node_vars = np.array([pulp.LpVariable(f"{i}",
lowBound = 0, upBound=1, cat = pulp.LpInteger)
for i in range(n_person)])
print("setting up objective...")
# minimize sum of age
model += pulp.lpSum([node*age for node,age in zip(node_vars, df.Age)])
print("setting up constraints...")
for pred, con in constraints:
lpsum = pulp.lpSum(node*(1.*pred(x)) for node, (_,x) in zip(node_vars, df.iterrows()))
model += lpsum>=con.min
model += lpsum<=con.max
print("solving...")
model.solve()
_get = np.vectorize(pulp.value)
res = _get(node_vars).astype(np.int)
score = model.objective.value()
print(f"minimal score assignment total {score} (mean age = {score/np.sum(res)}) ")
# check and print results
selected = np.array([tuple(df.iterrows())])[0,:,1][res>0]
return res, selected
#==================================================================
if __name__ == '__main__':
df = pd.read_csv("Example_Volunteers.csv")
df = cleanup(df)
# 1. setup predicates and constraints
constraints = setup_constraints(df)
# 2. solve ILP
res, selected = solve_model(df, constraints)
# 3. print result
print(" \n Minimal solution ", "+"*20)
for i,x in enumerate(selected):
cprint(f"{i+1}. {x.First_Name} {x.Last_Name}", "blue")
print(x)
print(" \n Constraints ", "+"*20)
# check and print results
for pred, con in constraints:
actual_val = np.sum([r*(1*pred(x)) for r, (_,x) in zip(res, df.iterrows())])
actual_con = actual_val>=con.min and actual_val<=con.max
col = "green" if actual_con else "red"
cprint(f"{str(pred):40} {con.min:4} <={actual_val:4} <={con.max:4} -> {actual_con}", col)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment