eerkaijun/update_deep_q_learning.py

## update_deep_q_learning.py
# agent taking a step at each time step
def agent_step(self, reward, state):
    # reward (r.t) is the reward obtained from the previous step, state (s.t+1) is the state for the current step
    act_values = self.model.predict(state)[0] # an array of action values of current time step
    action = self.agent_take_action(act_values) # action chosen in current time step

    # Perform an update to the neural network model based on previous step
    target = reward + self.discount * act_values[action]
    target_f = self.model.predict(self.prev_state) # action values of previous step
    target_f[0][self.prev_action] = target # update

    self.model.fit(self.prev_state, target_f)
	# agent taking a step at each time step
	def agent_step(self, reward, state):
	# reward (r.t) is the reward obtained from the previous step, state (s.t+1) is the state for the current step
	act_values = self.model.predict(state)[0] # an array of action values of current time step
	action = self.agent_take_action(act_values) # action chosen in current time step

	# Perform an update to the neural network model based on previous step
	target = reward + self.discount * act_values[action]
	target_f = self.model.predict(self.prev_state) # action values of previous step
	target_f[0][self.prev_action] = target # update

	self.model.fit(self.prev_state, target_f)