DRMacIver/knapsack.py

## knapsack.py
"""
This is an example of how to use Hypothesis to test a classic combinatorial
optimisation problem without having a reference implementation to compare
against.

The problem we're going to look at is the knapsack packing problem: Given a
set of objects with value and weight, how can maximize the total value while
keeping the total weight under a certain amount.

You can solve this exactly as an integer linear programming without too much
difficulty, but we're instead going to look at a straightforward greedy
solution and test it to breaking point.
"""


def solve_knapsack(items, capacity):
    """
    Takes a list of pairs of positive integers (value, size) and a
    positive integer capacity and returns a list of pairs drawn from
    the list which try to maximize the value.
    """
    result = []
    current_fill = 0
    while True:
        items = [
            item for item in items if current_fill + item[1] <= capacity
        ]
        assert all(item[1] <= capacity for item in items)
        if not items:
            break
        # Pick the item with the highest value per unit weight, breaking ties
        # by picking the more valuable of the two.
        chosen = max(items, key=lambda x: x[0] / x[1])
        items.remove(chosen)
        result.append(chosen)
        current_fill += chosen[1]
        assert current_fill <= capacity
    return result


def score_solution(solution):
    """Return the total value of a solution to the knapsack problem"""
    return sum(item[0] for item in solution)


import unittest
from hypothesis import given, assume
import hypothesis.strategies as st


# This will be the types of data that we want to feed to our algorithm to
# test it: Our items are tuples of positive integers, and our capacities
# are integers.
Item = st.tuples(st.integers(min_value=1), st.integers(min_value=1))
ItemSet = st.lists(Item)
Capacity = st.integers(min_value=1)


class TestKnapsack(unittest.TestCase):
    @given(ItemSet, Capacity)
    def test_returns_a_fitting_result(self, items, capacity):
        """Our first test is the most straightforward and is very important:
        We test that for any input our desired output meets the constraints
        we're trying to satisfy."""
        result = solve_knapsack(items, capacity)
        self.assertLessEqual(
            sum(size for value, size in result),
            capacity
        )

    @given(ItemSet, Capacity)
    def test_returns_a_non_empty_result_if_any_fit(self, items, capacity):
        """Ensure that if a solution is possible then one is found. i.e. if
        any of the items fit we don't return an empty solution"""
        assume(any(item[1] <= capacity for item in items))
        result = solve_knapsack(items, capacity)
        self.assertGreater(len(result), 0)

    @given(ItemSet, Capacity)
    def test_is_independent_of_order(self, items, capacity):
        """This tests that we are not vulnerable to the initial ordering of the
        items."""
        result = solve_knapsack(items, capacity)
        items.reverse()
        result2 = solve_knapsack(items, capacity)
        self.assertEqual(score_solution(result), score_solution(result2))

    @given(ItemSet, Capacity, Capacity)
    def test_raising_capacity_cannot_worsen_solution(self, items, c1, c2):
        """If the capacity is increased, we can reuse the current solution, so
        an optimal solution cannot be worse than the current one."""
        assume(c1 != c2)
        c1, c2 = sorted((c1, c2))
        result1 = solve_knapsack(items, c1)
        result2 = solve_knapsack(items, c2)
        self.assertLessEqual(score_solution(result1), score_solution(result2))

    @given(ItemSet, Capacity)
    def test_increasing_score_of_chosen_item_improves_things(
        self, items, capacity
    ):
        """This test is more interesting. What we're doing here is deliberately
        altering the problem so we know that there is a solution strictly
        better than the current one and seeing if the algorithm can find it.
        By taking one of the items in our allegedly optimal solution and
        increasing its value, that provides a solution of strictly higher value
        than the current one. Note that we do not replace the original item, so
        the current solution remains viable. This is important as it gives the
        optimiser a solution to get distracted by.

        This falsifies with items=[(1, 2), (2, 4)], capacity=6
        """
        assume(any(item[1] <= capacity for item in items))
        result = solve_knapsack(items, capacity)
        assert result
        for item in result:
            new_items = list(items)
            new_items.append((item[0] + 1, item[1]))
            new_result = solve_knapsack(new_items, capacity)
            self.assertGreater(
                score_solution(new_result),
                score_solution(result))

    @given(ItemSet, Capacity)
    def test_increasing_weight_of_chosen_item_does_not_improve_things(
        self, items, capacity
    ):
        """In this test we're doing the opposite of the previous one: We take
        an item that was in the optimal solution and make it less appealing by
        increasing its weight slightly. This doesn't have to strictly hurt -
        e.g. there could be an identical item available to replace it - but it
        shouldn't be able to *help*.

        This falsifies with items=[(1, 1), (5, 10)], capacity=10).
        """
        assume(any(item[1] <= capacity for item in items))
        result = solve_knapsack(items, capacity)
        assert result
        for item in result:
            new_items = list(items)
            new_items.remove(item)
            new_items.append((item[0], item[1] + 1))
            new_result = solve_knapsack(new_items, capacity)
            self.assertLessEqual(
                score_solution(new_result), score_solution(result))

    @given(ItemSet, Capacity)
    def test_removing_a_chosen_item_does_not_improve_matters(
        self, items, capacity
    ):
        """This takes one of the items
        in our optimal solution, removes it from the list and reruns the
        process. This means that any solution to the new input would also
        be a solution to the old input, so it shouldn't be able to improve
        matters.

        This falsifies with items=[(2, 7), (1, 1)], capacity=7)
        """
        result = solve_knapsack(items, capacity)
        score = score_solution(result)
        for item in result:
            new_items = list(items)
            new_items.remove(item)
            new_result = solve_knapsack(new_items, capacity)
            self.assertLessEqual(score_solution(new_result), score)


if __name__ == '__main__':
    unittest.main()
	"""
	This is an example of how to use Hypothesis to test a classic combinatorial
	optimisation problem without having a reference implementation to compare
	against.

	The problem we're going to look at is the knapsack packing problem: Given a
	set of objects with value and weight, how can maximize the total value while
	keeping the total weight under a certain amount.

	You can solve this exactly as an integer linear programming without too much
	difficulty, but we're instead going to look at a straightforward greedy
	solution and test it to breaking point.
	"""


	def solve_knapsack(items, capacity):
	"""
	Takes a list of pairs of positive integers (value, size) and a
	positive integer capacity and returns a list of pairs drawn from
	the list which try to maximize the value.
	"""
	result = []
	current_fill = 0
	while True:
	items = [
	item for item in items if current_fill + item[1] <= capacity
	]
	assert all(item[1] <= capacity for item in items)
	if not items:
	break
	# Pick the item with the highest value per unit weight, breaking ties
	# by picking the more valuable of the two.
	chosen = max(items, key=lambda x: x[0] / x[1])
	items.remove(chosen)
	result.append(chosen)
	current_fill += chosen[1]
	assert current_fill <= capacity
	return result


	def score_solution(solution):
	"""Return the total value of a solution to the knapsack problem"""
	return sum(item[0] for item in solution)


	import unittest
	from hypothesis import given, assume
	import hypothesis.strategies as st


	# This will be the types of data that we want to feed to our algorithm to
	# test it: Our items are tuples of positive integers, and our capacities
	# are integers.
	Item = st.tuples(st.integers(min_value=1), st.integers(min_value=1))
	ItemSet = st.lists(Item)
	Capacity = st.integers(min_value=1)


	class TestKnapsack(unittest.TestCase):
	@given(ItemSet, Capacity)
	def test_returns_a_fitting_result(self, items, capacity):
	"""Our first test is the most straightforward and is very important:
	We test that for any input our desired output meets the constraints
	we're trying to satisfy."""
	result = solve_knapsack(items, capacity)
	self.assertLessEqual(
	sum(size for value, size in result),
	capacity
	)

	@given(ItemSet, Capacity)
	def test_returns_a_non_empty_result_if_any_fit(self, items, capacity):
	"""Ensure that if a solution is possible then one is found. i.e. if
	any of the items fit we don't return an empty solution"""
	assume(any(item[1] <= capacity for item in items))
	result = solve_knapsack(items, capacity)
	self.assertGreater(len(result), 0)

	@given(ItemSet, Capacity)
	def test_is_independent_of_order(self, items, capacity):
	"""This tests that we are not vulnerable to the initial ordering of the
	items."""
	result = solve_knapsack(items, capacity)
	items.reverse()
	result2 = solve_knapsack(items, capacity)
	self.assertEqual(score_solution(result), score_solution(result2))

	@given(ItemSet, Capacity, Capacity)
	def test_raising_capacity_cannot_worsen_solution(self, items, c1, c2):
	"""If the capacity is increased, we can reuse the current solution, so
	an optimal solution cannot be worse than the current one."""
	assume(c1 != c2)
	c1, c2 = sorted((c1, c2))
	result1 = solve_knapsack(items, c1)
	result2 = solve_knapsack(items, c2)
	self.assertLessEqual(score_solution(result1), score_solution(result2))

	@given(ItemSet, Capacity)
	def test_increasing_score_of_chosen_item_improves_things(
	self, items, capacity
	):
	"""This test is more interesting. What we're doing here is deliberately
	altering the problem so we know that there is a solution strictly
	better than the current one and seeing if the algorithm can find it.
	By taking one of the items in our allegedly optimal solution and
	increasing its value, that provides a solution of strictly higher value
	than the current one. Note that we do not replace the original item, so
	the current solution remains viable. This is important as it gives the
	optimiser a solution to get distracted by.

	This falsifies with items=[(1, 2), (2, 4)], capacity=6
	"""
	assume(any(item[1] <= capacity for item in items))
	result = solve_knapsack(items, capacity)
	assert result
	for item in result:
	new_items = list(items)
	new_items.append((item[0] + 1, item[1]))
	new_result = solve_knapsack(new_items, capacity)
	self.assertGreater(
	score_solution(new_result),
	score_solution(result))

	@given(ItemSet, Capacity)
	def test_increasing_weight_of_chosen_item_does_not_improve_things(
	self, items, capacity
	):
	"""In this test we're doing the opposite of the previous one: We take
	an item that was in the optimal solution and make it less appealing by
	increasing its weight slightly. This doesn't have to strictly hurt -
	e.g. there could be an identical item available to replace it - but it
	shouldn't be able to help.

	This falsifies with items=[(1, 1), (5, 10)], capacity=10).
	"""
	assume(any(item[1] <= capacity for item in items))
	result = solve_knapsack(items, capacity)
	assert result
	for item in result:
	new_items = list(items)
	new_items.remove(item)
	new_items.append((item[0], item[1] + 1))
	new_result = solve_knapsack(new_items, capacity)
	self.assertLessEqual(
	score_solution(new_result), score_solution(result))

	@given(ItemSet, Capacity)
	def test_removing_a_chosen_item_does_not_improve_matters(
	self, items, capacity
	):
	"""This takes one of the items
	in our optimal solution, removes it from the list and reruns the
	process. This means that any solution to the new input would also
	be a solution to the old input, so it shouldn't be able to improve
	matters.

	This falsifies with items=[(2, 7), (1, 1)], capacity=7)
	"""
	result = solve_knapsack(items, capacity)
	score = score_solution(result)
	for item in result:
	new_items = list(items)
	new_items.remove(item)
	new_result = solve_knapsack(new_items, capacity)
	self.assertLessEqual(score_solution(new_result), score)


	if __name__ == '__main__':
	unittest.main()