Madhivarman/learntomake5.py

## learntomake5.py
import random
import numpy as np

class Game:

	def __init__(self):
		self.reset()

	def reset(self):
		self.current_number = random.randrange(1,12)

		if(self.current_number == 5):
			self.reset()

		self.turns = 0

	def has_won(self):
		return self.current_number == 5 and self.turns >=3

	def has_lost(self):
		return self.current_number != 5 and self.turns <= 3

	def is_active(self):
		return not self.has_lost() and not self.has_won()

	def play_rules(self, action):

		if(self.turns >= 3):
			raise Exception("Maximum Try has Reached... Lets play another game..!")

		self.turns += 1
		self.current_number += int(action)


class AgentConfig:

	def __init__(self):
		self.nb_epoch = None
		self.print_every_n_epoch = 1


class TrainStats:

	#initially all Traning status paramaters are zero
	def __init__(self):
		self.epoch = 0
		self.nb_wins = 0
		self.nb_lost = 0
		self.p_wins = 0
		self.p_losses = 0


class Agent:

	def __init__(self,number_epochs):
		self.qtable = {}
		self.epochs = number_epochs.nb_epoch #get number of epochs
		self.randomness_rate = 0

	#print the  result
	def print_epoch_status(self,stats):

		print("Epochs 1000 Wins:{win}%  Loss:{loss}".format(win=stats.p_wins,loss=stats.p_losses))


	#initially fill qtable with zeros
	def ensure_qtable_entry(self, state):
		if state not in self.qtable:
			self.qtable[state] = np.zeros(6)

	# create random actions
	def get_action(self,state):

		if not self.should_go_random() and state in self.qtable:
			return self.predict_action(state)

		return self.get_random_action()

	def should_go_random(self):
		return np.random.rand() <= self.randomness_rate

	def get_random_action(self):
		return random.randrange(0,6)

	def predict_action(self,state):
		return np.argmax(self.qtable[state])

	# mapping actions (0,1,2,3,4,5) to answers (3,2,1,-3,-2,-1)
	def action_to_answer(self,action):
		return actionMap[action]

	#train the agent
	def train(self,state,action,reward, next_state,final):

		self.ensure_qtable_entry(state)
		self.ensure_qtable_entry(next_state)

		if final:
			q_value = reward
		else:
			next_state_actions = self.qtable[next_state]
			next_state_max = np.amax(next_state_actions)

			q_value = reward + self.config.discount_factor * next_state_max

		self.qtable[state][action] = q_value

	def get_reward(self):

		if self.game.has_won():
			return 1

		elif self.game.has_lost():
			return -1

		else:
			return -0.1

	def play_and_train(self):

		stats = TrainStats()

		for epoch in range(1, config.nb_epoch+1):

			game.reset() #reset the game
			stats.epoch = epoch

			while(game.is_active()):

				state = game.current_number

				action = self.get_action()
				human_readable_answer = self.action_to_answer(action)

				game.play_rules(human_readable_answer)

				reward = self.get_reward()
				next_state = game.current_number
				final = not game.is_active()

				self.train(state,action,reward,next_state,final)

				if(game.has_won()):
					stats.nb_wins += 1

				if(game.has_lost()):
					stats.nb_lost += 1

			stats.p_wins = 100 / epoch * stats.nb_wins
			stats.p_losses = 100 / epoch * stats.nb_lost

		if (epoch % config.print_every_n_epoch == 0):
			self.print_epoch_status(stats)


#global declaration and main program starts from here
game = Game()
config = AgentConfig()
config.nb_epoch = 100
agent = Agent(config)
agent.randomness_rate = 0
agent.play_and_train()

#evaluate the trained model
config.nb_epoch = 1000
agent.play_and_train()
	import random
	import numpy as np

	class Game:

	def __init__(self):
	self.reset()

	def reset(self):
	self.current_number = random.randrange(1,12)

	if(self.current_number == 5):
	self.reset()

	self.turns = 0

	def has_won(self):
	return self.current_number == 5 and self.turns >=3

	def has_lost(self):
	return self.current_number != 5 and self.turns <= 3

	def is_active(self):
	return not self.has_lost() and not self.has_won()

	def play_rules(self, action):

	if(self.turns >= 3):
	raise Exception("Maximum Try has Reached... Lets play another game..!")

	self.turns += 1
	self.current_number += int(action)



	class AgentConfig:

	def __init__(self):
	self.nb_epoch = None
	self.print_every_n_epoch = 1


	class TrainStats:

	#initially all Traning status paramaters are zero
	def __init__(self):
	self.epoch = 0
	self.nb_wins = 0
	self.nb_lost = 0
	self.p_wins = 0
	self.p_losses = 0


	class Agent:

	def __init__(self,number_epochs):
	self.qtable = {}
	self.epochs = number_epochs.nb_epoch #get number of epochs
	self.randomness_rate = 0

	#print the result
	def print_epoch_status(self,stats):

	print("Epochs 1000 Wins:{win}% Loss:{loss}".format(win=stats.p_wins,loss=stats.p_losses))


	#initially fill qtable with zeros
	def ensure_qtable_entry(self, state):
	if state not in self.qtable:
	self.qtable[state] = np.zeros(6)

	# create random actions
	def get_action(self,state):

	if not self.should_go_random() and state in self.qtable:
	return self.predict_action(state)

	return self.get_random_action()

	def should_go_random(self):
	return np.random.rand() <= self.randomness_rate

	def get_random_action(self):
	return random.randrange(0,6)

	def predict_action(self,state):
	return np.argmax(self.qtable[state])

	# mapping actions (0,1,2,3,4,5) to answers (3,2,1,-3,-2,-1)
	def action_to_answer(self,action):
	return actionMap[action]

	#train the agent
	def train(self,state,action,reward, next_state,final):

	self.ensure_qtable_entry(state)
	self.ensure_qtable_entry(next_state)

	if final:
	q_value = reward
	else:
	next_state_actions = self.qtable[next_state]
	next_state_max = np.amax(next_state_actions)

	q_value = reward + self.config.discount_factor * next_state_max

	self.qtable[state][action] = q_value

	def get_reward(self):

	if self.game.has_won():
	return 1

	elif self.game.has_lost():
	return -1

	else:
	return -0.1

	def play_and_train(self):

	stats = TrainStats()

	for epoch in range(1, config.nb_epoch+1):

	game.reset() #reset the game
	stats.epoch = epoch

	while(game.is_active()):

	state = game.current_number

	action = self.get_action()
	human_readable_answer = self.action_to_answer(action)

	game.play_rules(human_readable_answer)

	reward = self.get_reward()
	next_state = game.current_number
	final = not game.is_active()

	self.train(state,action,reward,next_state,final)

	if(game.has_won()):
	stats.nb_wins += 1

	if(game.has_lost()):
	stats.nb_lost += 1

	stats.p_wins = 100 / epoch * stats.nb_wins
	stats.p_losses = 100 / epoch * stats.nb_lost

	if (epoch % config.print_every_n_epoch == 0):
	self.print_epoch_status(stats)


	#global declaration and main program starts from here
	game = Game()
	config = AgentConfig()
	config.nb_epoch = 100
	agent = Agent(config)
	agent.randomness_rate = 0
	agent.play_and_train()

	#evaluate the trained model
	config.nb_epoch = 1000
	agent.play_and_train()