robsannaa/Q.py

## Q.py
for i in range(1000):
  current_state = np.random.randint(0,12)
  playable_actions = []
  for j in range(12):
    if R[current_state, j] > 0:
      playable_actions.append(j)
  next_state = np.random.choice(playable_actions)
  TD = R[current_state, next_state] + gamma*Q[next_state, np.argmax(Q[next_state,])]
  - Q[current_state, next_state]
  Q[current_state, next_state] = Q[current_state, next_state] + alpha*TD
	for i in range(1000):
	current_state = np.random.randint(0,12)
	playable_actions = []
	for j in range(12):
	if R[current_state, j] > 0:
	playable_actions.append(j)
	next_state = np.random.choice(playable_actions)
	TD = R[current_state, next_state] + gamma*Q[next_state, np.argmax(Q[next_state,])]
	- Q[current_state, next_state]
	Q[current_state, next_state] = Q[current_state, next_state] + alpha*TD