Skip to content

Instantly share code, notes, and snippets.

Last active December 27, 2020 10:04
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save alirezamika/68a936b64f45b33bf2615ce35742f64f to your computer and use it in GitHub Desktop.
from collections import defaultdict
class Q:
def __init__(self, alpha=0.5, discount=0.5):
self.alpha = alpha = discount
self.values = defaultdict(lambda: defaultdict(lambda: 0.0))
def update(self, state, action, next_state, reward):
value = self.values[state][action]
v = list(self.values[next_state].values())
next_q = max(v) if v else 0
value = value + self.alpha * (reward + * next_q - value)
self.values[state][action] = value
def get_best_action(self, state):
keys = list(self.values[state].keys())
if not keys:
return None
return max(keys, key=lambda x: self.values[state][x])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment