WilliamJou/Faucet

## Faucet
importall POMDPs
using POMDPToolbox
using SARSOP
using BasicPOMCP
using D3Trees
using ParticleFilters

const DISH = 1
const HAND = 2
const TEMPS = 0:10:30
const TINDEX = Dict{Int, Int}(t=>i for (i,t) in enumerate(TEMPS))


struct FState
    task::Int
    time::Int
    prev_temp::Int
end

struct FPOMDP <: POMDP{FState, Int, Int}
    p_change::Float64
    max_time::Int
end

p = FPOMDP(0.5, 10)

const DTEMP = Dict{Int, Int}(DISH=>30, HAND=>20)

isterminal(p::FPOMDP, s::FState) = s.time > p.max_time

states(p::FPOMDP) = vec(collect(FState(task, time, pt) for task in [DISH, HAND], time in 0:p.max_time, pt in TEMPS))
n_states(p::FPOMDP) = length(TEMPS)*(p.max_time+1)*2
const SINDEX = Dict{FState, Int}(s=>i for (i,s) in enumerate(states(p)))
state_index(p::FPOMDP, s::FState) = SINDEX[s]

actions(p::FPOMDP) = TEMPS
n_actions(p::FPOMDP) = length(TEMPS)
action_index(p::FPOMDP, a::Int) = TINDEX[a]

observations(p::FPOMDP) = TEMPS
n_observations(p::FPOMDP) = length(TEMPS)
obs_index(p::FPOMDP, o::Int) = TINDEX[o]

function transition(p::FPOMDP, s::FState, a::Int)
    SparseCat([FState(s.task, s.time+1, a)], [1.0])
end

function observation(p::FPOMDP, a::Int, sp::FState)
    if sp.time > 2 && a != DTEMP[sp.task]
        return SparseCat([DTEMP[sp.task], 0], [0.5, 0.5])
    else
        return SparseCat([0], [1.0])
    end
end

function reward(p::FPOMDP, s::FState, a::Int)
    if a == DTEMP[s.task]
        return 10.0
    else
        return -10.0
    end
end

initial_state_distribution(p::FPOMDP) = SparseCat([FState(t, 0, 0) for t in [DISH, HAND]], [0.5, 0.5])


# policy = RandomPolicy(p)
solver = SARSOPSolver()
#solver = POMCPSolver(c=100)
policy = solve(solver, p)

for (b, s, a, r, o) in stepthrough(p, policy, "bsaro")
    frac_hand = length(filter(s->s.task==HAND, particles(b)))/n_particles(b)
    @show frac_hand
    @show s
    @show a
    @show r
    @show o
end

inchrome(D3Tree(policy))
	importall POMDPs
	using POMDPToolbox
	using SARSOP
	using BasicPOMCP
	using D3Trees
	using ParticleFilters

	const DISH = 1
	const HAND = 2
	const TEMPS = 0:10:30
	const TINDEX = Dict{Int, Int}(t=>i for (i,t) in enumerate(TEMPS))


	struct FState
	task::Int
	time::Int
	prev_temp::Int
	end

	struct FPOMDP <: POMDP{FState, Int, Int}
	p_change::Float64
	max_time::Int
	end

	p = FPOMDP(0.5, 10)

	const DTEMP = Dict{Int, Int}(DISH=>30, HAND=>20)

	isterminal(p::FPOMDP, s::FState) = s.time > p.max_time

	states(p::FPOMDP) = vec(collect(FState(task, time, pt) for task in [DISH, HAND], time in 0:p.max_time, pt in TEMPS))
	n_states(p::FPOMDP) = length(TEMPS)(p.max_time+1)2
	const SINDEX = Dict{FState, Int}(s=>i for (i,s) in enumerate(states(p)))
	state_index(p::FPOMDP, s::FState) = SINDEX[s]

	actions(p::FPOMDP) = TEMPS
	n_actions(p::FPOMDP) = length(TEMPS)
	action_index(p::FPOMDP, a::Int) = TINDEX[a]

	observations(p::FPOMDP) = TEMPS
	n_observations(p::FPOMDP) = length(TEMPS)
	obs_index(p::FPOMDP, o::Int) = TINDEX[o]

	function transition(p::FPOMDP, s::FState, a::Int)
	SparseCat([FState(s.task, s.time+1, a)], [1.0])
	end

	function observation(p::FPOMDP, a::Int, sp::FState)
	if sp.time > 2 && a != DTEMP[sp.task]
	return SparseCat([DTEMP[sp.task], 0], [0.5, 0.5])
	else
	return SparseCat([0], [1.0])
	end
	end

	function reward(p::FPOMDP, s::FState, a::Int)
	if a == DTEMP[s.task]
	return 10.0
	else
	return -10.0
	end
	end

	initial_state_distribution(p::FPOMDP) = SparseCat([FState(t, 0, 0) for t in [DISH, HAND]], [0.5, 0.5])


	# policy = RandomPolicy(p)
	solver = SARSOPSolver()
	#solver = POMCPSolver(c=100)
	policy = solve(solver, p)

	for (b, s, a, r, o) in stepthrough(p, policy, "bsaro")
	frac_hand = length(filter(s->s.task==HAND, particles(b)))/n_particles(b)
	@show frac_hand
	@show s
	@show a
	@show r
	@show o
	end

	inchrome(D3Tree(policy))