Skip to content

Instantly share code, notes, and snippets.

@robsannaa
Created November 29, 2019 12:40
Show Gist options
  • Save robsannaa/c53bbbfcf936d70ae1e98a62e0a9a77b to your computer and use it in GitHub Desktop.
Save robsannaa/c53bbbfcf936d70ae1e98a62e0a9a77b to your computer and use it in GitHub Desktop.
def route(starting_location, ending_location):
R_new = np.copy(R)
ending_state = location_to_state[ending_location]
R_new[ending_state, ending_state] = 1000
Q = np.array(np.zeros([12,12]))
for i in range(1000):
current_state = np.random.randint(0,12)
playable_actions = []
for j in range(12):
if R_new[current_state, j] > 0:
playable_actions.append(j)
next_state = np.random.choice(playable_actions)
TD = R_new[current_state, next_state] + gamma * Q[next_state, np.argmax(Q[next_state,])] - Q[current_state, next_state]
Q[current_state, next_state] = Q[current_state, next_state] + alpha * TD
route = [starting_location]
next_location = starting_location
while (next_location != ending_location):
starting_state = location_to_state[starting_location]
next_state = np.argmax(Q[starting_state,])
next_location = state_to_location[next_state]
route.append(next_location)
starting_location = next_location
return route
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment