Danaze/updating.py

## updating.py
    def QLupdate(self, state, action, reward, new_state):
        # updating the Q-value of the visited state-action pair
        self.Q_table[state][action] += self.learning_rate * (reward + self.discount * np.max(self.Q_table[new_state]) - self.Q_table[state][action])
    def SARSAupdate(self, state, action, reward, new_state, next_action):
        # updating the Q-value of the visited state-action pair
        self.Q_table[state][action] += self.learning_rate * (reward + self.discount * self.Q_table[new_state][next_action] - self.Q_table[state][action])
	def QLupdate(self, state, action, reward, new_state):
	# updating the Q-value of the visited state-action pair
	self.Q_table[state][action] += self.learning_rate * (reward + self.discount * np.max(self.Q_table[new_state]) - self.Q_table[state][action])
	def SARSAupdate(self, state, action, reward, new_state, next_action):
	# updating the Q-value of the visited state-action pair
	self.Q_table[state][action] += self.learning_rate * (reward + self.discount * self.Q_table[new_state][next_action] - self.Q_table[state][action])