Last active
December 2, 2016 07:41
-
-
Save zsunberg/38142fc6ce46a463788ae64cae76ca31 to your computer and use it in GitHub Desktop.
A script to test the effects of preallocating memory in POMDPs.jl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using POMDPs | |
import POMDPs: create_state, discount, reward | |
using POMDPToolbox | |
type ImageMDP <: MDP{Matrix{Int},Int} | |
size::Tuple{Int, Int} | |
end | |
create_state(mdp::ImageMDP) = Array(Int, mdp.size[1], mdp.size[2]) | |
discount(::ImageMDP) = 0.9 | |
function generate_sr(mdp::ImageMDP, s::Matrix{Int}, a::Int, rng::AbstractRNG, sp::Matrix{Int}=create_state(mdp)) | |
copy!(sp, s) | |
r = sp[a,a] | |
i = rand(rng, 1:mdp.size[1]) | |
j = rand(rng, 1:mdp.size[2]) | |
sp[i,j] += a | |
return sp, r | |
end | |
function step!(s::Matrix{Int}, mdp::ImageMDP, a::Int, rng::AbstractRNG) | |
i = rand(rng, 1:mdp.size[1]) | |
j = rand(rng, 1:mdp.size[2]) | |
s[i,j] += a | |
return s | |
end | |
function reward(mdp::ImageMDP,s::Matrix{Int},a::Int) | |
return s[a,a] | |
end | |
type IPolicy <: Policy{Matrix{Int}} | |
addend::Int | |
end | |
action(p::IPolicy, s::Matrix{Int}, a::Int=1) = p.addend | |
function simulate_no_alloc{S,A}(sim::RolloutSimulator, mdp::MDP{S,A}, policy::Policy, initial_state::S) | |
eps = get(sim.eps, 0.0) | |
max_steps = get(sim.max_steps, typemax(Int)) | |
s = initial_state | |
disc = 1.0 | |
r_total = 0.0 | |
step = 1 | |
while disc > eps && !isterminal(mdp, s) && step <= max_steps | |
a = action(policy, s) | |
sp, r = generate_sr(mdp, s, a, sim.rng) | |
r_total += disc*r | |
s = sp | |
disc *= discount(mdp) | |
step += 1 | |
end | |
return r_total | |
end | |
function simulate_alloc{S,A}(sim::RolloutSimulator, mdp::MDP{S,A}, policy::Policy, initial_state::S) | |
eps = get(sim.eps, 0.0) | |
max_steps = get(sim.max_steps, typemax(Int)) | |
s = deepcopy(initial_state) | |
sp = create_state(mdp) | |
a = create_action(mdp) | |
disc = 1.0 | |
r_total = 0.0 | |
step = 1 | |
while disc > eps && !isterminal(mdp, s) && step <= max_steps | |
a = action(policy, s, a) | |
sp, r = generate_sr(mdp, s, a, sim.rng, sp) | |
r_total += disc*r | |
# alternates using the memory allocated for s and sp so nothing new has to be allocated | |
tmp = s | |
s = sp | |
sp = tmp | |
disc *= discount(mdp) | |
step += 1 | |
end | |
return r_total | |
end | |
function simulate_step{S,A}(sim::RolloutSimulator, mdp::MDP{S,A}, policy::Policy, initial_state::S) | |
eps = get(sim.eps, 0.0) | |
max_steps = get(sim.max_steps, typemax(Int)) | |
s = deepcopy(initial_state) | |
sp = create_state(mdp) | |
a = create_action(mdp) | |
disc = 1.0 | |
r_total = 0.0 | |
step = 1 | |
while disc > eps && !isterminal(mdp, s) && step <= max_steps | |
a = action(policy, s, a) | |
r = reward(mdp, s, a) | |
step!(s, mdp, a, rng) | |
r_total += disc*r | |
disc *= discount(mdp) | |
step += 1 | |
end | |
return r_total | |
end | |
mdp = ImageMDP((500,500)) | |
policy = IPolicy(147) | |
rng = MersenneTwister(123) | |
is = rand(rng, Int, mdp.size...) | |
@show simulate_alloc(RolloutSimulator(max_steps=500, rng=rng), mdp, policy, is) | |
rng = MersenneTwister(123) | |
is = rand(rng, Int, mdp.size...) | |
@show simulate_no_alloc(RolloutSimulator(max_steps=500, rng=rng), mdp, policy, is) | |
rng = MersenneTwister(123) | |
is = rand(rng, Int, mdp.size...) | |
@show simulate_step(RolloutSimulator(max_steps=500, rng=rng), mdp, policy, is) | |
println("Step") | |
rtot=0.0 | |
@time for i = 1:100 | |
rng = MersenneTwister(i) | |
is = rand(rng, Int, mdp.size...) | |
rtot += simulate_step(RolloutSimulator(max_steps=500, rng=rng), mdp, policy, is) | |
end | |
@show rtot | |
println("Pre Allocation:") | |
rtot=0.0 | |
@time for i = 1:100 | |
rng = MersenneTwister(i) | |
is = rand(rng, Int, mdp.size...) | |
rtot += simulate_alloc(RolloutSimulator(max_steps=500, rng=rng), mdp, policy, is) | |
end | |
@show rtot | |
println("No Allocation") | |
rtot=0.0 | |
@time for i = 1:100 | |
rng = MersenneTwister(i) | |
is = rand(rng, Int, mdp.size...) | |
rtot += simulate_no_alloc(RolloutSimulator(max_steps=500, rng=rng), mdp, policy, is) | |
end | |
@show rtot |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment