Zachary Sunberg zsunberg

## python_pomcpow.py
from julia.CommonRLSpaces import Box

from julia.Main import Float64
from julia.POMDPs import solve, pdf,action
from julia.QMDP import QMDPSolver
from julia.POMCPOW import POMCPOWSolver
from julia.POMDPTools import stepthrough, alphavectors, Uniform, Deterministic
from julia.Distributions import Normal,AbstractMvNormal,MvNormal

from quickpomdps import QuickPOMDP

## lunar_lander.jl
struct LunarLander <: POMDP{Vector{Float64}, Vector{Float64}, Vector{Float64}}
    dt::Float64
    m::Float64
    I::Float64
    Q::Vector{Float64}
    R::Vector{Float64}
end

function LunarLander(;dt::Float64=0.1, m::Float64=1.0, I::Float64=10.0)
    Q = [0.0, 0.0, 0.0, 0.1, 0.1, 0.01]

## multithread_comparison.jl
using BenchmarkTools

function operate!(shared, locks)
    i = rand(1:length(shared))
    lock(locks[i]) do
        shared[i] += 1
    end
end

function operate_many!(shared, locks, channel)

## jrl_error.jl
using ReinforcementLearningZoo
using ReinforcementLearningBase
using ReinforcementLearningCore: NeuralNetworkApproximator, EpsilonGreedyExplorer, QBasedPolicy, CircularCompactSARTSATrajectory
using ReinforcementLearning
using Flux
using Flux: glorot_uniform, huber_loss
import Random
import BSON

RL = ReinforcementLearningBase

## model.mof.json
{
  "name": "MathOptFormat Model",
  "version": {
    "major": 0,
    "minor": 4
  },
  "variables": [
    {
      "name": "x[1,1]"
    },

## CommonRL.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zsunberg
                / CommonRL.md
            
            
              Last active
              June 14, 2020 08:56
            
              
                A common RL environment interface
              
          
    The only code in the entire package initially is
abstract type CommonEnv end

function reset! end
function step! end
function actions end
(of course there will be extensive documentation, etc.)

  
## variable_discount_state_space_augmentation.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zsunberg
                / variable_discount_state_space_augmentation.md
            
            
              Last active
              December 6, 2019 05:26
            
          
    Here are the two ways that I was referring to about augmenting the state space (these are illustrative rather than efficient or complete implementations):

Add a single new terminal state

struct VariableDiscountWrapper1{S, A, F<:Function} <: MDP{Union{S, TerminalState}, A}
    m::MDP{S, A}
    discount::F
end

  
## ekf_usage.jl
using ExtendedKalmanFilters
using Distributions
using DelimitedFiles

# We may also want to look at DynamicalSystems.jl
# The package should accept AbstractArrays wherever possible so people can use StaticArrays

# Model semantics
# x_{t+1} = f(x_t, u_t) + w_t
# y_t = h(x_t) + v_t # should the control be an argument of h?

## transmat.jl
using POMDPs
using POMDPModelTools

function transition_matrix_a_s_sp(mdp::MDP)
    na = n_actions(mdp)
    ns = n_states(mdp)
    mat = zeros(na, ns, ns) # this should be sparse

    for a in actions(mdp)
        ai = actionindex(mdp, a)

## gw_bench.jl
using POMDPs
using POMDPModelTools
using POMDPSimulators
using POMDPPolicies
using StaticArrays
using Parameters
using Random
using BenchmarkTools
using POMDPModels
using Test
	from julia.CommonRLSpaces import Box

	from julia.Main import Float64
	from julia.POMDPs import solve, pdf,action
	from julia.QMDP import QMDPSolver
	from julia.POMCPOW import POMCPOWSolver
	from julia.POMDPTools import stepthrough, alphavectors, Uniform, Deterministic
	from julia.Distributions import Normal,AbstractMvNormal,MvNormal

	from quickpomdps import QuickPOMDP
	struct LunarLander <: POMDP{Vector{Float64}, Vector{Float64}, Vector{Float64}}
	dt::Float64
	m::Float64
	I::Float64
	Q::Vector{Float64}
	R::Vector{Float64}
	end

	function LunarLander(;dt::Float64=0.1, m::Float64=1.0, I::Float64=10.0)
	Q = [0.0, 0.0, 0.0, 0.1, 0.1, 0.01]
	using BenchmarkTools

	function operate!(shared, locks)
	i = rand(1:length(shared))
	lock(locks[i]) do
	shared[i] += 1
	end
	end

	function operate_many!(shared, locks, channel)
	using ReinforcementLearningZoo
	using ReinforcementLearningBase
	using ReinforcementLearningCore: NeuralNetworkApproximator, EpsilonGreedyExplorer, QBasedPolicy, CircularCompactSARTSATrajectory
	using ReinforcementLearning
	using Flux
	using Flux: glorot_uniform, huber_loss
	import Random
	import BSON

	RL = ReinforcementLearningBase
	{
	"name": "MathOptFormat Model",
	"version": {
	"major": 0,
	"minor": 4
	},
	"variables": [
	{
	"name": "x[1,1]"
	},
	using ExtendedKalmanFilters
	using Distributions
	using DelimitedFiles

	# We may also want to look at DynamicalSystems.jl
	# The package should accept AbstractArrays wherever possible so people can use StaticArrays

	# Model semantics
	# x_{t+1} = f(x_t, u_t) + w_t
	# y_t = h(x_t) + v_t # should the control be an argument of h?
	using POMDPs
	using POMDPModelTools

	function transition_matrix_a_s_sp(mdp::MDP)
	na = n_actions(mdp)
	ns = n_states(mdp)
	mat = zeros(na, ns, ns) # this should be sparse

	for a in actions(mdp)
	ai = actionindex(mdp, a)
	using POMDPs
	using POMDPModelTools
	using POMDPSimulators
	using POMDPPolicies
	using StaticArrays
	using Parameters
	using Random
	using BenchmarkTools
	using POMDPModels
	using Test