maweigert/assign.py Secret

## assign.py
""" Selection problem

pip install pulp termcolor

To extend and adopt possible constraints change the following:

1) update LANGUAGES to cover all appearing languages

e.g. add

"AR":["Arabic","Arab"]

if it might appear in the csv


2) update setup_constraints with your custom predicates and constraint numbers

A predicate is simply a
     function(x) -> (True,False)
that takes a dataframe record x as an input and outputs whether the predicate is True for x, e.g.

 p_age = Predicate(lambda x: x.Age>30)

A constraint is a tuple of (predicate, MinMax(min,max) )  where min (max) is the minimal (maximal) number of records to fullfill the predicate, e.g.

    constraints = (
        ( Predicate(lambda x: x.Age>30), MinMax(3,8) ),
       ...
       )


"""


import numpy as np
import pandas as pd
from collections import namedtuple
import pulp
from functools import reduce
from termcolor import colored, cprint

# Definition of languages and potential synonyms

LANGUAGES={
    "DE":["German", "Deutsch"],
    "PE":["Persian"],
    "HI":["Hindi"],
    "RU":["Russian","RUS"],
    "IT":["Italian"],
    "SP":["Spanish","ESP"],
    "FR":["French"],
    "EN":["English"],
}


def cleanup(df):
    for k, v in LANGUAGES.items():
        for val in v:
            df.Languages.replace({f"(^|[^a-zA-Z])({val}?)($|[^a-zA-Z])":f"\\1{k}\\3"}, regex =True, inplace = True)
    df = df.rename(columns=dict((k,k.strip().replace(" ","_")) for k in df.columns))
    return df

def _and(x,y):
    return x and y

def _or(x,y):
    return x or y

def contains(s,elements, reduce_func=_and):
    """helper function, checks for all strings in <elements> if they are in s
    and returns depending on reduce_func e.g. True if for all/one/some this is True"""
    if isinstance(elements, str):
        elements = (elements,)
    return reduce(reduce_func,(el.lower() in str(s).lower() for el in elements))

class Predicate(object):
    def __init__(self, func, name = "predicate"):
        self.func = func
        self.name = name
    def __call__(self, x):
        return self.func(x)
    def __and__(self, other):
        return Predicate(lambda x: self(x) and other(x), name = f"{self} AND {other}")
    def __or__(self, other):
        return Predicate(lambda x: self(x) or other(x), name = f"{self} OR {other}")
    def __invert__(self):
        return Predicate(lambda x: not self(x), name = f"NOT {self}")
    def __repr__(self):
        return f"{self.name}"

MinMax = namedtuple("MinMax",["min","max"])


def setup_constraints(df):
    # PREDICATES DEFINITION ============

    # Predicate(func, name)
    # func: function accepting a Dataframe record and returning True or False, e.g.
    # p = Predicate(lambda x: x.Age>30)
    # predicates can also be logically combined (with &, |, and ~), e.g
    # p1 = Predicate(lambda x: x.Age>30)
    # p2 = Predicate(lambda x: contains(x.Languages,"EN")
    # p3 = p1 & p2


    n_person = len(df)

    p_driver = Predicate(
        lambda x: contains(x.Drivers_License,("Stick","Automatic"),_or),
        name = "driver"
    )
    p_driver_stick = Predicate(lambda x: contains(x.Drivers_License,"Stick"),
                               name="driver with stick")

    p_langs = {
        k:Predicate(lambda x, k=k: contains(x.Languages,k), name = f"lang_{k}") for k in LANGUAGES.keys()
    }

    p_elderly = Predicate(lambda x: x.Age>=60, name = "older")

    p_driver_EN = p_driver & p_langs["EN"]

    # CONSTRAINT DEFINITION ============

    # a list of tuples  (predicate, MinMax(min_number, max_number) )
    constraints = (
        (p_driver, MinMax(10,n_person)),
        (p_driver_stick, MinMax(7,n_person)),
        (p_driver & p_langs["EN"], MinMax(2,n_person)),
        (p_langs["DE"] & p_langs["RU"] , MinMax(1,n_person)),
        (p_langs["DE"] & p_langs["PE"] , MinMax(1,n_person)),
        (p_langs["DE"] & p_langs["SP"] , MinMax(1,n_person)),
        (p_langs["DE"] & p_langs["FR"] , MinMax(1,n_person)),
        (p_elderly , MinMax(0,5)),
    )
    return constraints

def solve_model(df, constraints):

    # ILP MODEL ============

    n_person = len(df)

    model = pulp.LpProblem("Matching", pulp.LpMinimize)

    node_vars = np.array([pulp.LpVariable(f"{i}",
                        lowBound = 0, upBound=1, cat = pulp.LpInteger)
                          for i in range(n_person)])

    print("setting up objective...")
    # minimize sum of age
    model += pulp.lpSum([node*age for node,age in zip(node_vars, df.Age)])

    print("setting up constraints...")

    for pred, con in constraints:
        lpsum = pulp.lpSum(node*(1.*pred(x)) for node, (_,x) in zip(node_vars, df.iterrows()))
        model += lpsum>=con.min
        model += lpsum<=con.max


    print("solving...")

    model.solve()

    _get = np.vectorize(pulp.value)
    res = _get(node_vars).astype(np.int)

    score  = model.objective.value()
    print(f"minimal score assignment total {score} (mean age = {score/np.sum(res)}) ")

    # check and print results
    selected = np.array([tuple(df.iterrows())])[0,:,1][res>0]
    return res, selected


#==================================================================

if __name__ == '__main__':


    df = pd.read_csv("Example_Volunteers.csv")

    df = cleanup(df)

    # 1. setup predicates and constraints
    constraints = setup_constraints(df)

    # 2. solve ILP
    res, selected = solve_model(df, constraints)

    # 3. print result
    print(" \n Minimal solution ", "+"*20)
    for i,x in enumerate(selected):
        cprint(f"{i+1}. {x.First_Name} {x.Last_Name}", "blue")
        print(x)

    print(" \n Constraints ",  "+"*20)
    # check and print results
    for pred, con in constraints:
        actual_val = np.sum([r*(1*pred(x)) for r, (_,x) in zip(res, df.iterrows())])
        actual_con = actual_val>=con.min and actual_val<=con.max
        col = "green" if actual_con else "red"
        cprint(f"{str(pred):40} {con.min:4} <={actual_val:4} <={con.max:4}  ->  {actual_con}", col)
	""" Selection problem

	pip install pulp termcolor

	To extend and adopt possible constraints change the following:

	1) update LANGUAGES to cover all appearing languages

	e.g. add

	"AR":["Arabic","Arab"]

	if it might appear in the csv


	2) update setup_constraints with your custom predicates and constraint numbers

	A predicate is simply a
	function(x) -> (True,False)
	that takes a dataframe record x as an input and outputs whether the predicate is True for x, e.g.

	p_age = Predicate(lambda x: x.Age>30)

	A constraint is a tuple of (predicate, MinMax(min,max) ) where min (max) is the minimal (maximal) number of records to fullfill the predicate, e.g.

	constraints = (
	( Predicate(lambda x: x.Age>30), MinMax(3,8) ),
	...
	)


	"""


	import numpy as np
	import pandas as pd
	from collections import namedtuple
	import pulp
	from functools import reduce
	from termcolor import colored, cprint

	# Definition of languages and potential synonyms

	LANGUAGES={
	"DE":["German", "Deutsch"],
	"PE":["Persian"],
	"HI":["Hindi"],
	"RU":["Russian","RUS"],
	"IT":["Italian"],
	"SP":["Spanish","ESP"],
	"FR":["French"],
	"EN":["English"],
	}


	def cleanup(df):
	for k, v in LANGUAGES.items():
	for val in v:
	df.Languages.replace({f"(^\|[^a-zA-Z])({val}?)($\|[^a-zA-Z])":f"\\1{k}\\3"}, regex =True, inplace = True)
	df = df.rename(columns=dict((k,k.strip().replace(" ","_")) for k in df.columns))
	return df

	def _and(x,y):
	return x and y

	def _or(x,y):
	return x or y

	def contains(s,elements, reduce_func=_and):
	"""helper function, checks for all strings in <elements> if they are in s
	and returns depending on reduce_func e.g. True if for all/one/some this is True"""
	if isinstance(elements, str):
	elements = (elements,)
	return reduce(reduce_func,(el.lower() in str(s).lower() for el in elements))

	class Predicate(object):
	def __init__(self, func, name = "predicate"):
	self.func = func
	self.name = name
	def __call__(self, x):
	return self.func(x)
	def __and__(self, other):
	return Predicate(lambda x: self(x) and other(x), name = f"{self} AND {other}")
	def __or__(self, other):
	return Predicate(lambda x: self(x) or other(x), name = f"{self} OR {other}")
	def __invert__(self):
	return Predicate(lambda x: not self(x), name = f"NOT {self}")
	def __repr__(self):
	return f"{self.name}"

	MinMax = namedtuple("MinMax",["min","max"])


	def setup_constraints(df):
	# PREDICATES DEFINITION ============

	# Predicate(func, name)
	# func: function accepting a Dataframe record and returning True or False, e.g.
	# p = Predicate(lambda x: x.Age>30)
	# predicates can also be logically combined (with &, \|, and ~), e.g
	# p1 = Predicate(lambda x: x.Age>30)
	# p2 = Predicate(lambda x: contains(x.Languages,"EN")
	# p3 = p1 & p2


	n_person = len(df)

	p_driver = Predicate(
	lambda x: contains(x.Drivers_License,("Stick","Automatic"),_or),
	name = "driver"
	)
	p_driver_stick = Predicate(lambda x: contains(x.Drivers_License,"Stick"),
	name="driver with stick")

	p_langs = {
	k:Predicate(lambda x, k=k: contains(x.Languages,k), name = f"lang_{k}") for k in LANGUAGES.keys()
	}

	p_elderly = Predicate(lambda x: x.Age>=60, name = "older")

	p_driver_EN = p_driver & p_langs["EN"]

	# CONSTRAINT DEFINITION ============

	# a list of tuples (predicate, MinMax(min_number, max_number) )
	constraints = (
	(p_driver, MinMax(10,n_person)),
	(p_driver_stick, MinMax(7,n_person)),
	(p_driver & p_langs["EN"], MinMax(2,n_person)),
	(p_langs["DE"] & p_langs["RU"] , MinMax(1,n_person)),
	(p_langs["DE"] & p_langs["PE"] , MinMax(1,n_person)),
	(p_langs["DE"] & p_langs["SP"] , MinMax(1,n_person)),
	(p_langs["DE"] & p_langs["FR"] , MinMax(1,n_person)),
	(p_elderly , MinMax(0,5)),
	)
	return constraints

	def solve_model(df, constraints):

	# ILP MODEL ============

	n_person = len(df)

	model = pulp.LpProblem("Matching", pulp.LpMinimize)

	node_vars = np.array([pulp.LpVariable(f"{i}",
	lowBound = 0, upBound=1, cat = pulp.LpInteger)
	for i in range(n_person)])

	print("setting up objective...")
	# minimize sum of age
	model += pulp.lpSum([node*age for node,age in zip(node_vars, df.Age)])

	print("setting up constraints...")

	for pred, con in constraints:
	lpsum = pulp.lpSum(node(1.pred(x)) for node, (_,x) in zip(node_vars, df.iterrows()))
	model += lpsum>=con.min
	model += lpsum<=con.max


	print("solving...")

	model.solve()

	_get = np.vectorize(pulp.value)
	res = _get(node_vars).astype(np.int)

	score = model.objective.value()
	print(f"minimal score assignment total {score} (mean age = {score/np.sum(res)}) ")

	# check and print results
	selected = np.array([tuple(df.iterrows())])[0,:,1][res>0]
	return res, selected



	#==================================================================

	if __name__ == '__main__':


	df = pd.read_csv("Example_Volunteers.csv")

	df = cleanup(df)

	# 1. setup predicates and constraints
	constraints = setup_constraints(df)

	# 2. solve ILP
	res, selected = solve_model(df, constraints)

	# 3. print result
	print(" \n Minimal solution ", "+"*20)
	for i,x in enumerate(selected):
	cprint(f"{i+1}. {x.First_Name} {x.Last_Name}", "blue")
	print(x)

	print(" \n Constraints ", "+"*20)
	# check and print results
	for pred, con in constraints:
	actual_val = np.sum([r(1pred(x)) for r, (_,x) in zip(res, df.iterrows())])
	actual_con = actual_val>=con.min and actual_val<=con.max
	col = "green" if actual_con else "red"
	cprint(f"{str(pred):40} {con.min:4} <={actual_val:4} <={con.max:4} -> {actual_con}", col)