Skip to content

Instantly share code, notes, and snippets.

@Madhivarman
Created May 10, 2018 18:35
Show Gist options
  • Save Madhivarman/8f1be311b547051b230f3d8549fd5521 to your computer and use it in GitHub Desktop.
Save Madhivarman/8f1be311b547051b230f3d8549fd5521 to your computer and use it in GitHub Desktop.
Using Reinforcement Learning - QTable Algorithm algorithms learns to make 5 within three attempts. Input number ranges from 1 to 12.
import random
import numpy as np
class Game:
def __init__(self):
self.reset()
def reset(self):
self.current_number = random.randrange(1,12)
if(self.current_number == 5):
self.reset()
self.turns = 0
def has_won(self):
return self.current_number == 5 and self.turns >=3
def has_lost(self):
return self.current_number != 5 and self.turns <= 3
def is_active(self):
return not self.has_lost() and not self.has_won()
def play_rules(self, action):
if(self.turns >= 3):
raise Exception("Maximum Try has Reached... Lets play another game..!")
self.turns += 1
self.current_number += int(action)
class AgentConfig:
def __init__(self):
self.nb_epoch = None
self.print_every_n_epoch = 1
class TrainStats:
#initially all Traning status paramaters are zero
def __init__(self):
self.epoch = 0
self.nb_wins = 0
self.nb_lost = 0
self.p_wins = 0
self.p_losses = 0
class Agent:
def __init__(self,number_epochs):
self.qtable = {}
self.epochs = number_epochs.nb_epoch #get number of epochs
self.randomness_rate = 0
#print the result
def print_epoch_status(self,stats):
print("Epochs 1000 Wins:{win}% Loss:{loss}".format(win=stats.p_wins,loss=stats.p_losses))
#initially fill qtable with zeros
def ensure_qtable_entry(self, state):
if state not in self.qtable:
self.qtable[state] = np.zeros(6)
# create random actions
def get_action(self,state):
if not self.should_go_random() and state in self.qtable:
return self.predict_action(state)
return self.get_random_action()
def should_go_random(self):
return np.random.rand() <= self.randomness_rate
def get_random_action(self):
return random.randrange(0,6)
def predict_action(self,state):
return np.argmax(self.qtable[state])
# mapping actions (0,1,2,3,4,5) to answers (3,2,1,-3,-2,-1)
def action_to_answer(self,action):
return actionMap[action]
#train the agent
def train(self,state,action,reward, next_state,final):
self.ensure_qtable_entry(state)
self.ensure_qtable_entry(next_state)
if final:
q_value = reward
else:
next_state_actions = self.qtable[next_state]
next_state_max = np.amax(next_state_actions)
q_value = reward + self.config.discount_factor * next_state_max
self.qtable[state][action] = q_value
def get_reward(self):
if self.game.has_won():
return 1
elif self.game.has_lost():
return -1
else:
return -0.1
def play_and_train(self):
stats = TrainStats()
for epoch in range(1, config.nb_epoch+1):
game.reset() #reset the game
stats.epoch = epoch
while(game.is_active()):
state = game.current_number
action = self.get_action()
human_readable_answer = self.action_to_answer(action)
game.play_rules(human_readable_answer)
reward = self.get_reward()
next_state = game.current_number
final = not game.is_active()
self.train(state,action,reward,next_state,final)
if(game.has_won()):
stats.nb_wins += 1
if(game.has_lost()):
stats.nb_lost += 1
stats.p_wins = 100 / epoch * stats.nb_wins
stats.p_losses = 100 / epoch * stats.nb_lost
if (epoch % config.print_every_n_epoch == 0):
self.print_epoch_status(stats)
#global declaration and main program starts from here
game = Game()
config = AgentConfig()
config.nb_epoch = 100
agent = Agent(config)
agent.randomness_rate = 0
agent.play_and_train()
#evaluate the trained model
config.nb_epoch = 1000
agent.play_and_train()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment