pragatibaheti/test.py

## test.py
#initial state from where the bot starts
current_state = 7
steps = [current_state]

while current_state != 3:

    next_step_index = np.where(Q[current_state,] == np.max(Q[current_state,]))[1]

    if next_step_index.shape[0] > 1:
        next_step_index = int(np.random.choice(next_step_index, size = 1))
    else:
        next_step_index = int(next_step_index)

    steps.append(next_step_index)
    current_state = next_step_index

print("Most efficient path:")
print(steps)
	#initial state from where the bot starts
	current_state = 7
	steps = [current_state]

	while current_state != 3:

	next_step_index = np.where(Q[current_state,] == np.max(Q[current_state,]))[1]

	if next_step_index.shape[0] > 1:
	next_step_index = int(np.random.choice(next_step_index, size = 1))
	else:
	next_step_index = int(next_step_index)

	steps.append(next_step_index)
	current_state = next_step_index

	print("Most efficient path:")
	print(steps)