Skip to content

Instantly share code, notes, and snippets.

View jknthn's full-sized avatar

Jeremi Kaczmarczyk jknthn

View GitHub Profile
def monte_carlo_e_soft(env, episodes=100, policy=None, epsilon=0.01):
if not policy:
policy = create_random_policy(env) # 1.
Q = create_state_action_dictionary(env, policy) # 2.
returns = {} # 3.
for _ in range(episodes): # 4.
G = 0 # 5.
episode = run_game(env=env, policy=policy, display=False) # 6.
def value_iteration(V_s, theta=0.01, discount_rate=0.5):
value_for_state_map = create_value_for_state_map() # 1.
delta = 100 # 2.
while not delta < theta: # 3.
delta = 0 # 4.
for state in range(1, 15): # 5.
v = V_s[state] # 6.
totals = {} # 7.
policy = create_random_policy()
V_s = iterative_policy_evaluation(policy) # {0: 0.0, 1: -1.7, 2: -1.9, 3: -1.9, 4: -1.7, 5: -1.9, 6: -1.9, 7: -1.9, 8: -1.9, 9: -1.9, 10: -1.9, 11: -1.7, 12: -1.9, 13: -1.9, 14: -1.7, 15: 0.0}
policy = create_greedy_policy(V_s)
V_s = iterative_policy_evaluation(policy) # {0: 0.0, 1: -1.0, 2: -1.5, 3: -1.8, 4: -1.0, 5: -1.5, 6: -1.8, 7: -1.5, 8: -1.5, 9: -1.8, 10: -1.5, 11: -1.0, 12: -1.8, 13: -1.5, 14: -1.0, 15: 0.0}
policy = create_greedy_policy(V_s)
def iterative_policy_evaluation(policy, theta=0.01, discount_rate=0.5):
V_s = {i: 0 for i in range(16)} # 1.
probablitiy_map = create_probability_map() # 2.
delta = 100 # 3.
while not delta < theta: # 4.
delta = 0 # 5.
for state in range(16): # 6.
v = V_s[state] # 7.
@jknthn
jknthn / p.csv
Created February 26, 2018 06:30
state_prime reward state action
0 -1 2 'N'
@jknthn
jknthn / data.txt
Last active February 26, 2018 06:06
// Random Policy
{0: {'E': 0.0, 'N': 0.0, 'S': 0.0, 'W': 0.0},
1: {'E': 0.25, 'N': 0.25, 'S': 0.25, 'W': 0.25},
2: {'E': 0.25, 'N': 0.25, 'S': 0.25, 'W': 0.25},
...
13: {'E': 0.25, 'N': 0.25, 'S': 0.25, 'W': 0.25},
14: {'E': 0.25, 'N': 0.25, 'S': 0.25, 'W': 0.25},
15: {'E': 0.0, 'N': 0.0, 'S': 0.0, 'W': 0.0}}
// State to State prime
@jknthn
jknthn / agent.py
Last active February 26, 2018 05:30
def agent(policy, starting_position=None):
l = list(range(16))
state_to_state_prime = create_state_to_state_prime_verbose_map()
agent_position = randint(1, 14) if starting_position is None else starting_position
step_number = 1
while not (agent_position == 0 or agent_position == 15):
current_policy = policy[agent_position]
next_move = random()
-----------------
| X | | | |
-----------------
| | | | |
-----------------
| A | | | |
-----------------
| | | | X |
-----------------
class UCB(KBanditSolution):
def count_ucb(self, q, c, step, n):
if n == 0:
return sys.maxsize
return (q + (c * sqrt((log(step) / n))))
def solve(self, c):
Q = {i: 0 for i in range(k)} # 1. Value function
N = {i: 0 for i in range(k)} # 2. Number of actions, for update rule
class WeightedAverage(KBanditSolution):
def solve(self, exploration_rate, step_size, initial_value):
Q = {i: initial_value for i in range(k)} # 1. Value function
N = {i: 0 for i in range(k)} # 2. Number of actions, for update rule
for i in range(self.steps): # 3. Main loop
explore = random.uniform(0, 1) < exploration_rate # 4. Exploration
if explore:
action = random.randint(0, k - 1) # 5. Exploration: Choosing random action