Danaze/QLtrain.py

## QLtrain.py
    def QLtrain(self):
        cum_reward = np.zeros((self.num_episodes))
        for ep in range(self.num_episodes):
             current_state = self.discretize_state(self.env.reset())
            done = False
            while not done:
                #choosing action according to our exploration-exploitation policy
                action = self.choose_action(current_state)
                obs, reward, done, _ = self.env.step(action)
                cum_reward[ep]+=reward
                new_state = self.discretize_state(obs)
                self.QLupdate(current_state, action, reward, new_state)
                current_state = new_state
            self.getEpsilon()
            self.getLR()
        return cum_reward
        print('QL based training is finished!')
	def QLtrain(self):
	cum_reward = np.zeros((self.num_episodes))
	for ep in range(self.num_episodes):
	current_state = self.discretize_state(self.env.reset())
	done = False
	while not done:
	#choosing action according to our exploration-exploitation policy
	action = self.choose_action(current_state)
	obs, reward, done, _ = self.env.step(action)
	cum_reward[ep]+=reward
	new_state = self.discretize_state(obs)
	self.QLupdate(current_state, action, reward, new_state)
	current_state = new_state
	self.getEpsilon()
	self.getLR()
	return cum_reward
	print('QL based training is finished!')