Zachary Sunberg zsunberg

## pomdps_alloc_test.jl
using POMDPs
import POMDPs: create_state, discount, reward
using POMDPToolbox

type ImageMDP <: MDP{Matrix{Int},Int}
    size::Tuple{Int, Int}
end

create_state(mdp::ImageMDP) = Array(Int, mdp.size[1], mdp.size[2])
discount(::ImageMDP) = 0.9

## new_autoviz.md

      
              1 file
            
          
              0 forks
            
          
              2 comments
            
          
              0 stars
            
          
                zsunberg
                / new_autoviz.md
            
            
              Last active
              February 26, 2018 19:36
            
          
    AutoViz.jl

A package for rendering simple scenes primarily consisting of cars on roadways using Cairo.
Usage

The main function is
render(scene)

  
## simple_grid_world.jl
const Vec2 = SVector{2,Int}
const StateTypes = Union{Vec2, TerminalState}

@with_kw struct SimpleGridWorld <: MDP{StateTypes, Symbol}
    size::Tuple{Int, Int}           = (10,10)
    rewards::Dict{Vec2, Float64}    = Dict(Vec2(4,3)=>-10.0, Vec2(4,6)=>-5.0, Vec2(9,3)=>10.0, Vec2(8,8)=>3.0)
    terminate_in::Set{Vec2}         = Set((Vec2(4,3), Vec2(4,6), Vec2(9,3), Vec2(8,8)))
    tprob::Float64                  = 0.7
    discount::Float64               = 0.95
end

## gw_bench.jl
using POMDPs
using POMDPModelTools
using POMDPSimulators
using POMDPPolicies
using StaticArrays
using Parameters
using Random
using BenchmarkTools
using POMDPModels
using Test

## transmat.jl
using POMDPs
using POMDPModelTools

function transition_matrix_a_s_sp(mdp::MDP)
    na = n_actions(mdp)
    ns = n_states(mdp)
    mat = zeros(na, ns, ns) # this should be sparse

    for a in actions(mdp)
        ai = actionindex(mdp, a)

## ekf_usage.jl
using ExtendedKalmanFilters
using Distributions
using DelimitedFiles

# We may also want to look at DynamicalSystems.jl
# The package should accept AbstractArrays wherever possible so people can use StaticArrays

# Model semantics
# x_{t+1} = f(x_t, u_t) + w_t
# y_t = h(x_t) + v_t # should the control be an argument of h?

## variable_discount_state_space_augmentation.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zsunberg
                / variable_discount_state_space_augmentation.md
            
            
              Last active
              December 6, 2019 05:26
            
          
    Here are the two ways that I was referring to about augmenting the state space (these are illustrative rather than efficient or complete implementations):

Add a single new terminal state

struct VariableDiscountWrapper1{S, A, F<:Function} <: MDP{Union{S, TerminalState}, A}
    m::MDP{S, A}
    discount::F
end

  
## CommonRL.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zsunberg
                / CommonRL.md
            
            
              Last active
              June 14, 2020 08:56
            
              
                A common RL environment interface
              
          
    The only code in the entire package initially is
abstract type CommonEnv end

function reset! end
function step! end
function actions end
(of course there will be extensive documentation, etc.)

  
## model.mof.json
{
  "name": "MathOptFormat Model",
  "version": {
    "major": 0,
    "minor": 4
  },
  "variables": [
    {
      "name": "x[1,1]"
    },

## jrl_error.jl
using ReinforcementLearningZoo
using ReinforcementLearningBase
using ReinforcementLearningCore: NeuralNetworkApproximator, EpsilonGreedyExplorer, QBasedPolicy, CircularCompactSARTSATrajectory
using ReinforcementLearning
using Flux
using Flux: glorot_uniform, huber_loss
import Random
import BSON

RL = ReinforcementLearningBase
	using POMDPs
	import POMDPs: create_state, discount, reward
	using POMDPToolbox

	type ImageMDP <: MDP{Matrix{Int},Int}
	size::Tuple{Int, Int}
	end

	create_state(mdp::ImageMDP) = Array(Int, mdp.size[1], mdp.size[2])
	discount(::ImageMDP) = 0.9
	const Vec2 = SVector{2,Int}
	const StateTypes = Union{Vec2, TerminalState}

	@with_kw struct SimpleGridWorld <: MDP{StateTypes, Symbol}
	size::Tuple{Int, Int} = (10,10)
	rewards::Dict{Vec2, Float64} = Dict(Vec2(4,3)=>-10.0, Vec2(4,6)=>-5.0, Vec2(9,3)=>10.0, Vec2(8,8)=>3.0)
	terminate_in::Set{Vec2} = Set((Vec2(4,3), Vec2(4,6), Vec2(9,3), Vec2(8,8)))
	tprob::Float64 = 0.7
	discount::Float64 = 0.95
	end
	using POMDPs
	using POMDPModelTools
	using POMDPSimulators
	using POMDPPolicies
	using StaticArrays
	using Parameters
	using Random
	using BenchmarkTools
	using POMDPModels
	using Test
	using POMDPs
	using POMDPModelTools

	function transition_matrix_a_s_sp(mdp::MDP)
	na = n_actions(mdp)
	ns = n_states(mdp)
	mat = zeros(na, ns, ns) # this should be sparse

	for a in actions(mdp)
	ai = actionindex(mdp, a)
	using ExtendedKalmanFilters
	using Distributions
	using DelimitedFiles

	# We may also want to look at DynamicalSystems.jl
	# The package should accept AbstractArrays wherever possible so people can use StaticArrays

	# Model semantics
	# x_{t+1} = f(x_t, u_t) + w_t
	# y_t = h(x_t) + v_t # should the control be an argument of h?
	{
	"name": "MathOptFormat Model",
	"version": {
	"major": 0,
	"minor": 4
	},
	"variables": [
	{
	"name": "x[1,1]"
	},
	using ReinforcementLearningZoo
	using ReinforcementLearningBase
	using ReinforcementLearningCore: NeuralNetworkApproximator, EpsilonGreedyExplorer, QBasedPolicy, CircularCompactSARTSATrajectory
	using ReinforcementLearning
	using Flux
	using Flux: glorot_uniform, huber_loss
	import Random
	import BSON

	RL = ReinforcementLearningBase