futurulus/car.py

## car.py
from __future__ import print_function
import random


STRATEGY = {}

def expected_value(seen, choice):
    '''
    The expected amount you get swindled, given what you've seen so
    far (`seen`, adjusted so that the lowest value is 0) and the
    choice you make ('hit' or 'stay').
    '''
    if choice == 'stay':
        return mean([offset + seen[-1] for offset in possible_offsets(seen)])
    elif choice == 'hit':
        return mean([expected_value(n, optimal_choice(n))
                     for n in possible_nexts(seen)])


def optimal_choice(seen):
    '''
    The best choice ('hit' or 'stay') to make given what you've seen
    so far. Results are stored in STRATEGY to avoid duplicating work.
    '''
    seen = tuple(sorted(seen[:-1]) + seen[-1:])
    if len(seen) == 5:
        return 'stay'
    elif seen in STRATEGY:
        return STRATEGY[seen]
    else:
        hit = expected_value(seen, 'hit')
        stay = expected_value(seen, 'stay')
        if hit < stay:
            choice = 'hit'
            better = hit
            worse = stay
        else:
            choice = 'stay'
            better = stay
            worse = hit

    print('%4s %s (%s < %s)' % (choice, seen, better, worse))
    STRATEGY[seen] = choice
    return choice


def possible_offsets(seen):
    '''
    All possible values of N, reported as how much less they are than
    the lowest price you've seen. The lowest value in `seen` should be 0.

    For example:

    >>> possible_offsets([0, 200])
    [0, 100, 200]
    '''
    if seen:
        return range(0, 401 - max(seen), 100)
    else:
        return [0]


def possible_nexts(seen):
    '''
    All possible sequences you could next encounter, given what you've seen
    so far. Each sequence is readjusted so that the lowest value is 0.
    A sequence may be reported more than once; the probability of each
    sequence is proportional to how often it appears.

    For example:

    >>> list(possible_nexts([0, 200]))  # doctest: +NORMALIZE_WHITESPACE
    [[0, 200, 100], [0, 200, 300], [0, 200, 400],
     [100, 300, 0], [0, 200, 100], [0, 200, 300],
     [200, 400, 0], [100, 300, 0], [0, 200, 100]]
    '''
    result = []
    for offset in possible_offsets(seen):
        actual_seen = [s + offset for s in seen]
        for remaining in sorted(set(range(0, 401, 100)) - set(actual_seen)):
            actual_next = actual_seen + [remaining]
            next_offset = min(actual_next)
            yield [a - next_offset for a in actual_next]


def run_simulation():
    '''
    Run a Monte Carlo simulation to test the strategy and verify its
    expected value.
    '''
    true_n = 42

    total_swindle = 0.0
    num_draws = 0

    while True:
        # Simulate one random ordering of prices.
        draw = range(true_n, true_n + 401, 100)
        random.shuffle(draw)

        # Apply the computed strategy. Note that true_n is not used here.
        seen = []
        for price in draw:
            seen.append(price)
            seen_relative = [s - min(seen) for s in seen]
            if optimal_choice(seen_relative) == 'stay':
                break

        # Figure out the results and add them to the tally
        total_swindle += price - true_n
        num_draws += 1

        print('Ran %d simulations; Average swindle = %s\r' %
              (num_draws, total_swindle / num_draws), end='')


def mean(vals):
    return sum(vals) / float(len(vals))


if __name__ == '__main__':
    best_expected = expected_value([], 'hit')
    print('Best expected: %s' % best_expected)

    run_simulation()
	from __future__ import print_function
	import random


	STRATEGY = {}

	def expected_value(seen, choice):
	'''
	The expected amount you get swindled, given what you've seen so
	far (`seen`, adjusted so that the lowest value is 0) and the
	choice you make ('hit' or 'stay').
	'''
	if choice == 'stay':
	return mean([offset + seen[-1] for offset in possible_offsets(seen)])
	elif choice == 'hit':
	return mean([expected_value(n, optimal_choice(n))
	for n in possible_nexts(seen)])


	def optimal_choice(seen):
	'''
	The best choice ('hit' or 'stay') to make given what you've seen
	so far. Results are stored in STRATEGY to avoid duplicating work.
	'''
	seen = tuple(sorted(seen[:-1]) + seen[-1:])
	if len(seen) == 5:
	return 'stay'
	elif seen in STRATEGY:
	return STRATEGY[seen]
	else:
	hit = expected_value(seen, 'hit')
	stay = expected_value(seen, 'stay')
	if hit < stay:
	choice = 'hit'
	better = hit
	worse = stay
	else:
	choice = 'stay'
	better = stay
	worse = hit

	print('%4s %s (%s < %s)' % (choice, seen, better, worse))
	STRATEGY[seen] = choice
	return choice


	def possible_offsets(seen):
	'''
	All possible values of N, reported as how much less they are than
	the lowest price you've seen. The lowest value in `seen` should be 0.

	For example:

	>>> possible_offsets([0, 200])
	[0, 100, 200]
	'''
	if seen:
	return range(0, 401 - max(seen), 100)
	else:
	return [0]


	def possible_nexts(seen):
	'''
	All possible sequences you could next encounter, given what you've seen
	so far. Each sequence is readjusted so that the lowest value is 0.
	A sequence may be reported more than once; the probability of each
	sequence is proportional to how often it appears.

	For example:

	>>> list(possible_nexts([0, 200])) # doctest: +NORMALIZE_WHITESPACE
	[[0, 200, 100], [0, 200, 300], [0, 200, 400],
	[100, 300, 0], [0, 200, 100], [0, 200, 300],
	[200, 400, 0], [100, 300, 0], [0, 200, 100]]
	'''
	result = []
	for offset in possible_offsets(seen):
	actual_seen = [s + offset for s in seen]
	for remaining in sorted(set(range(0, 401, 100)) - set(actual_seen)):
	actual_next = actual_seen + [remaining]
	next_offset = min(actual_next)
	yield [a - next_offset for a in actual_next]


	def run_simulation():
	'''
	Run a Monte Carlo simulation to test the strategy and verify its
	expected value.
	'''
	true_n = 42

	total_swindle = 0.0
	num_draws = 0

	while True:
	# Simulate one random ordering of prices.
	draw = range(true_n, true_n + 401, 100)
	random.shuffle(draw)

	# Apply the computed strategy. Note that true_n is not used here.
	seen = []
	for price in draw:
	seen.append(price)
	seen_relative = [s - min(seen) for s in seen]
	if optimal_choice(seen_relative) == 'stay':
	break

	# Figure out the results and add them to the tally
	total_swindle += price - true_n
	num_draws += 1

	print('Ran %d simulations; Average swindle = %s\r' %
	(num_draws, total_swindle / num_draws), end='')


	def mean(vals):
	return sum(vals) / float(len(vals))


	if __name__ == '__main__':
	best_expected = expected_value([], 'hit')
	print('Best expected: %s' % best_expected)

	run_simulation()