Skip to content

Instantly share code, notes, and snippets.

@tocom242242
Last active August 31, 2019 07:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tocom242242/15c8cb2f64104528c969e5caf73bc7d2 to your computer and use it in GitHub Desktop.
Save tocom242242/15c8cb2f64104528c969e5caf73bc7d2 to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
def epsilon_greedy_selection(epsilon, values):
"""
epsilon-greedy 行動選択
"""
nb_values = len(values)
if np.random.uniform() < epsilon: # 探索(epsilonの確率で)
action = np.random.randint(0, nb_values)
else: # 知識利用(1-epsilonの確率で)
action = np.argmax(values)
return action
nb_steps = 1000
values = [100, 50, 10]
epsilon = 0.1 # 探索率(epsilon)
results = []
# 複数回行動選択
for _ in range(nb_steps):
selected_action = epsilon_greedy_selection(epsilon, values)
results.append(selected_action)
# ヒストグラムのプロット
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.xaxis.set_major_locator(MaxNLocator(integer=True)) # x軸のメモリを整数に
ax.set_xticklabels(["", "A", "B", "C"])
ax.set_ylim(0, 1000)
ax.hist(results)
plt.savefig("result.jpg")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment