Last active
February 26, 2020 03:01
-
-
Save msaroufim/7ab9d302d5a698ec79557263a3054774 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://www.geeksforgeeks.org/q-learning-in-python/ | |
def createPolicy(Q, epsilon, num_actions): | |
def policyFunction(state): | |
# policy function can be a neural net instead of a dictionary from state to action | |
# helpful when state is too big like continuous domains | |
action_probs = np.ones(num_actions, dtype=float) * epsilon / num_actions | |
best_action = np.argmax(Q[state]) | |
action_probs[best_action] += 1.0 - epsilon | |
return action_probs | |
return policyFunction | |
def qLearning(env, num_episodes, discount_factor = 1.0, alpha = 0.6, epsilon = 0.1): | |
Q = defaultdict(lambda: np.zeros(env.action_space.n)) | |
stats = plotting.EpisodeStats( | |
episode_lengths = np.zeros(num_episodes), | |
episode_rewards = np.zeros(num_episodes)) | |
policy = createPolicy(Q, epsilon, env.action_space.n) | |
for ith_episode in range(num_episodes): | |
# Instead of reset() can do a domain randomization here | |
# need to also then log environment config | |
state = env.reset() | |
for t in itertools.count(): | |
action_probs = policy(state) | |
#epsilon greedy | |
action = np.random.choice(np.arange(len(action_probs)), p = action_probs) | |
next_state, reward, done, _ = env.step(action) | |
stats.episode_rewards[i_episode] += reward | |
stats.episode_lengths[i_episode] = t | |
# TD update | |
# argmax is bae | |
best_next_action = np.argmax(Q[next_state]) | |
td_target = reward + discount_factor * Q[next_state][best_next_action] | |
td_delta = td_target - Q[state][action] | |
Q[state][action] += alpha * td_delta | |
if done: | |
break | |
state = next_state | |
return Q, stats | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment