pragatibaheti/qmatrix.py

## qmatrix.py
#This equation, known as the Bellman equation, tells us that the maximum future reward.
Q[current_state, action] = R[current_state, action] + gamma * max_value
  print('max_value', R[current_state, action] + gamma * max_value)

  if (np.max(Q) > 0):
    return(np.sum(Q/np.max(Q)*100))
  else:
    return (0)
	#This equation, known as the Bellman equation, tells us that the maximum future reward.
	Q[current_state, action] = R[current_state, action] + gamma * max_value
	print('max_value', R[current_state, action] + gamma * max_value)

	if (np.max(Q) > 0):
	return(np.sum(Q/np.max(Q)*100))
	else:
	return (0)