Skip to content

Instantly share code, notes, and snippets.

@mitmul
Last active December 15, 2015 10:09
Show Gist options
  • Save mitmul/5243518 to your computer and use it in GitHub Desktop.
Save mitmul/5243518 to your computer and use it in GitHub Desktop.
require "gnuplot"
require "narray"
def draw_chart(x, y)
Gnuplot.open do |gp|
Gnuplot::Plot.new(gp) do |plot|
y.each do |name, value|
if x.size == value.size
plot.data << Gnuplot::DataSet.new([x, value]) do |ds|
ds.with = "lines"
ds.title = name
end
end
end
end
end
end
def nrand
Math.sqrt(-2 * Math.log(rand)) * Math.cos(2 * Math::PI * rand)
end
def result(rate)
rand <= rate ? 0 : 1
end
def choose(a_rate)
rand <= a_rate ? 0 : 1
end
def automaton_RP(try, exp, a, b, alpha)
# 行動の成功確率
a_rate = a
b_rate = b
# 最適な行動
correct = a_rate >= b_rate ? 0 : 1
# 行動の選択確率
a_choice_prob = 0.5
# 最適行動選択数
correct_choice = 0
# 最適行動選択確率履歴
path = []
try.times do |t|
choice = choose(a_choice_prob)
correct_choice += 1 if choice == correct
path << (correct_choice.to_f / (t + 1).to_f)
# 成功:0 失敗:1
reward =
if choice == 0
# Aを選んだ
result(a_rate)
else
# Bを選んだ
result(b_rate)
end
# 選択確率更新
# prob = choice == 0 ? a_choice_prob : 1 - a_choice_prob
a_choice_prob += alpha * (-1)**choice * (-1)**reward * (1 - a_choice_prob)
end
path
end
def automaton_RI(try, exp, a, b, alpha)
# 行動の成功確率
a_rate = a
b_rate = b
# 最適な行動
correct = a_rate >= b_rate ? 0 : 1
# 行動の選択確率
a_choice_prob = 0.5
# 最適行動選択数
correct_choice = 0
# 最適行動選択確率履歴
path = []
try.times do |t|
choice = choose(a_choice_prob)
correct_choice += 1 if choice == correct
path << (correct_choice.to_f / (t + 1).to_f)
# 成功:0 失敗:1
reward =
if choice == 0
# Aを選んだ
result(a_rate)
else
# Bを選んだ
result(b_rate)
end
# 選択確率更新
prob = choice == 0 ? a_choice_prob : 1 - a_choice_prob
if reward == 0
a_choice_prob += alpha * (-1)**choice * (1 - prob)
end
end
path
end
def exp
try = 500
exp = 2000
# 結果格納用
automaton_RP_path = automaton_RI_path = NVector.float(try)
# 実験
exp.times do |i|
automaton_RP_path += NVector.to_na(automaton_RP(try, i, 0.8, 0.9, 0.1))
automaton_RI_path += NVector.to_na(automaton_RI(try, i, 0.8, 0.9, 0.1))
end
automaton_RP_path = (automaton_RP_path / exp.to_f).to_a
automaton_RI_path = (automaton_RI_path / exp.to_f).to_a
draw_chart(NArray[0..try-1].to_a, {"Learning Automaton (RP)" => automaton_RP_path,
"Learning Automaton (RI)" => automaton_RI_path})
end
exp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment