zsunberg/variable_discount_state_space_augmentation.md

## variable_discount_state_space_augmentation.md

      
    Raw
  

              variable_discount_state_space_augmentation.md
            
          
    Here are the two ways that I was referring to about augmenting the state space (these are illustrative rather than efficient or complete implementations):

Add a single new terminal state

struct VariableDiscountWrapper1{S, A, F<:Function} <: MDP{Union{S, TerminalState}, A}
    m::MDP{S, A}
    discount::F
end

function transition(m::VariableDiscountWrapper1, s, a)
    td = transition(m.m, s, a)
    disc = m.discount(s, a)
    return CombinedDistribution([disc=>td, (1-disc)=>Deterministic(terminalstate)])
end

states(m::VariableDiscountWrapper1) = push!(collect(states(m.m)), terminalstate)

# forward other methods to m - we can make something that does this
(CombinedDistribution hasn't been implemented yet, if it doesn't make sense, let me know)

Keep track of the discount factor

struct VariableDiscountWrapper2{S, A, F<:Function} <: MDP{Tuple{S, Float64}, A}
    m::MDP{S, A}
    discount::F
end

initialstate(m::VariableDiscountWrapper2, rng) = (initialstate(m.m), 1.0)

function gen(::DDNNode{:sp}, m::VariableDiscountWrapper2, sd, a, rng)
    s, d = sd
    sp = gen(DDNNode(:sp), m.m, s, a, rng)
    return (sp, d*discount(s, a))
end

function reward(m::VariableDiscountWrapper2, sd, a, sdp)
    s, d = sd
    sp = sdp[1]
    return d*reward(m.m, s, a, sp)
end

# forward other methods to m