This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def monte_carlo_e_soft(env, episodes=100, policy=None, epsilon=0.01): | |
if not policy: | |
policy = create_random_policy(env) # 1. | |
Q = create_state_action_dictionary(env, policy) # 2. | |
returns = {} # 3. | |
for _ in range(episodes): # 4. | |
G = 0 # 5. | |
episode = run_game(env=env, policy=policy, display=False) # 6. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def value_iteration(V_s, theta=0.01, discount_rate=0.5): | |
value_for_state_map = create_value_for_state_map() # 1. | |
delta = 100 # 2. | |
while not delta < theta: # 3. | |
delta = 0 # 4. | |
for state in range(1, 15): # 5. | |
v = V_s[state] # 6. | |
totals = {} # 7. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
policy = create_random_policy() | |
V_s = iterative_policy_evaluation(policy) # {0: 0.0, 1: -1.7, 2: -1.9, 3: -1.9, 4: -1.7, 5: -1.9, 6: -1.9, 7: -1.9, 8: -1.9, 9: -1.9, 10: -1.9, 11: -1.7, 12: -1.9, 13: -1.9, 14: -1.7, 15: 0.0} | |
policy = create_greedy_policy(V_s) | |
V_s = iterative_policy_evaluation(policy) # {0: 0.0, 1: -1.0, 2: -1.5, 3: -1.8, 4: -1.0, 5: -1.5, 6: -1.8, 7: -1.5, 8: -1.5, 9: -1.8, 10: -1.5, 11: -1.0, 12: -1.8, 13: -1.5, 14: -1.0, 15: 0.0} | |
policy = create_greedy_policy(V_s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def iterative_policy_evaluation(policy, theta=0.01, discount_rate=0.5): | |
V_s = {i: 0 for i in range(16)} # 1. | |
probablitiy_map = create_probability_map() # 2. | |
delta = 100 # 3. | |
while not delta < theta: # 4. | |
delta = 0 # 5. | |
for state in range(16): # 6. | |
v = V_s[state] # 7. | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
state_prime | reward | state | action | |
---|---|---|---|---|
0 | -1 | 2 | 'N' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Random Policy | |
{0: {'E': 0.0, 'N': 0.0, 'S': 0.0, 'W': 0.0}, | |
1: {'E': 0.25, 'N': 0.25, 'S': 0.25, 'W': 0.25}, | |
2: {'E': 0.25, 'N': 0.25, 'S': 0.25, 'W': 0.25}, | |
... | |
13: {'E': 0.25, 'N': 0.25, 'S': 0.25, 'W': 0.25}, | |
14: {'E': 0.25, 'N': 0.25, 'S': 0.25, 'W': 0.25}, | |
15: {'E': 0.0, 'N': 0.0, 'S': 0.0, 'W': 0.0}} | |
// State to State prime |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def agent(policy, starting_position=None): | |
l = list(range(16)) | |
state_to_state_prime = create_state_to_state_prime_verbose_map() | |
agent_position = randint(1, 14) if starting_position is None else starting_position | |
step_number = 1 | |
while not (agent_position == 0 or agent_position == 15): | |
current_policy = policy[agent_position] | |
next_move = random() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
----------------- | |
| X | | | | | |
----------------- | |
| | | | | | |
----------------- | |
| A | | | | | |
----------------- | |
| | | | X | | |
----------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class UCB(KBanditSolution): | |
def count_ucb(self, q, c, step, n): | |
if n == 0: | |
return sys.maxsize | |
return (q + (c * sqrt((log(step) / n)))) | |
def solve(self, c): | |
Q = {i: 0 for i in range(k)} # 1. Value function | |
N = {i: 0 for i in range(k)} # 2. Number of actions, for update rule |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class WeightedAverage(KBanditSolution): | |
def solve(self, exploration_rate, step_size, initial_value): | |
Q = {i: initial_value for i in range(k)} # 1. Value function | |
N = {i: 0 for i in range(k)} # 2. Number of actions, for update rule | |
for i in range(self.steps): # 3. Main loop | |
explore = random.uniform(0, 1) < exploration_rate # 4. Exploration | |
if explore: | |
action = random.randint(0, k - 1) # 5. Exploration: Choosing random action |