Skip to content

Instantly share code, notes, and snippets.

@jaems33
Last active December 8, 2017 19:52
Show Gist options
  • Save jaems33/2bc2eacd2c066f9caefa3a825e8d6bf5 to your computer and use it in GitHub Desktop.
Save jaems33/2bc2eacd2c066f9caefa3a825e8d6bf5 to your computer and use it in GitHub Desktop.
def expectedReturn(state, action, stateValue):
# Initiate and populate returns with cost associated with moving cars
returns = 0.0
returns -= COST_OF_MOVING * np.absolute(action)
# Number of cars to start the day
carsLoc1 = int(min(state[0] - action, MAX_CARS))
carsLoc2 = int(min(state[1] + action, MAX_CARS))
# Iterate over Rental Rates
for rentalsLoc1 in range(0, POISSON_UPPER_BOUND):
for rentalsLoc2 in range(0, POISSON_UPPER_BOUND):
# Rental Probabilities
rentalsProb = poisson(rentalsLoc1, EXPECTED_FIRST_LOC_REQUESTS) * poisson(rentalsLoc2, EXPECTED_SECOND_LOC_REQUESTS)
# Total Rentals
totalRentalsLoc1 = min(carsLoc1, rentalsLoc1)
totalRentalsLoc2 = min(carsLoc2, rentalsLoc2)
# Total Rewards
rewards = (totalRentalsLoc1 + totalRentalsLoc2) * RENTAL_CREDIT
# Iterate over Return Rates
for returnsLoc1 in range(0, POISSON_UPPER_BOUND):
for returnsLoc2 in range(0, POISSON_UPPER_BOUND):
# Return Rate Probabilities
prob = poisson(returnsLoc1, EXPECTED_FIRST_LOC_RETURNS) * poisson(returnsLoc2, EXPECTED_SECOND_LOC_RETURNS) * rentalsProb
# Number of cars at the end of the day
carsLoc1_prime = min(carsLoc1 - totalRentalsLoc1 + returnsLoc1, MAX_CARS)
carsLoc2_prime = min(carsLoc2 - totalRentalsLoc2 + returnsLoc2, MAX_CARS)
# Number of cars at the end of the day
returns += prob * (rewards + DISCOUNT_RATE * stateValue[carsLoc1_prime, carsLoc2_prime])
return returns
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment