robsannaa/route.py

## route.py
def route(starting_location, ending_location):
    R_new = np.copy(R)
    ending_state = location_to_state[ending_location]
    R_new[ending_state, ending_state] = 1000
    Q = np.array(np.zeros([12,12]))
    for i in range(1000):
        current_state = np.random.randint(0,12)
        playable_actions = []
        for j in range(12):
            if R_new[current_state, j] > 0:
                playable_actions.append(j)
        next_state = np.random.choice(playable_actions)
        TD = R_new[current_state, next_state] + gamma * Q[next_state, np.argmax(Q[next_state,])] - Q[current_state, next_state]
        Q[current_state, next_state] = Q[current_state, next_state] + alpha * TD
    route = [starting_location]
    next_location = starting_location
    while (next_location != ending_location):
        starting_state = location_to_state[starting_location]
        next_state = np.argmax(Q[starting_state,])
        next_location = state_to_location[next_state]
        route.append(next_location)
        starting_location = next_location
    return route
	def route(starting_location, ending_location):
	R_new = np.copy(R)
	ending_state = location_to_state[ending_location]
	R_new[ending_state, ending_state] = 1000
	Q = np.array(np.zeros([12,12]))
	for i in range(1000):
	current_state = np.random.randint(0,12)
	playable_actions = []
	for j in range(12):
	if R_new[current_state, j] > 0:
	playable_actions.append(j)
	next_state = np.random.choice(playable_actions)
	TD = R_new[current_state, next_state] + gamma * Q[next_state, np.argmax(Q[next_state,])] - Q[current_state, next_state]
	Q[current_state, next_state] = Q[current_state, next_state] + alpha * TD
	route = [starting_location]
	next_location = starting_location
	while (next_location != ending_location):
	starting_state = location_to_state[starting_location]
	next_state = np.argmax(Q[starting_state,])
	next_location = state_to_location[next_state]
	route.append(next_location)
	starting_location = next_location
	return route