Skip to content

Instantly share code, notes, and snippets.

@athas
Created March 10, 2019 19:04
Show Gist options
  • Save athas/9ed3acc3c9a439ff94a5d4118bdbb605 to your computer and use it in GitHub Desktop.
Save athas/9ed3acc3c9a439ff94a5d4118bdbb605 to your computer and use it in GitHub Desktop.
type Agent = { strategy_sum: []f32
, regret_sum: []f32
}
type Setting = {num_fields: i32,
num_soldiers: i32
}
let compare (x: i32) (y: i32) =
i32.sgn (x-y)
let utility (action_one, action_two): f32 =
map2 compare action_one action_two
|> i32.sum |> r32
-- FIXME: this initialisation is completely wrong, so I hardcode the
-- init generated by F# for the small problem.
let init (d: Setting): [][]i32 =
tabulate_2d (d.num_soldiers*(d.num_soldiers+1)/2) d.num_fields
(\i j -> (i*j)%d.num_soldiers)
-- let actions = init {num_fields = 3, num_soldiers = 5}
let actions = [[0, 0, 5], [0, 1, 4], [0, 2, 3], [0, 3, 2], [0, 4, 1],
[0, 5, 0], [1, 0, 4], [1, 1, 3], [1, 2, 2], [1, 3, 1],
[1, 4, 0], [2, 0, 3], [2, 1, 2], [2, 2, 1], [2, 3, 0],
[3, 0, 2], [3, 1, 1], [3, 2, 0], [4, 0, 1], [4, 1, 0],
[5, 0, 0]]
let normalize (array: []f32) =
let temp = map (f32.max 0.0) array
let normalizingSum = f32.sum temp
in if normalizingSum > 0.0 then map (\x -> x / normalizingSum) temp
else replicate (length temp) (1.0 / r32 (length actions))
let add_sum (sum: []f32) (x: []f32) = map2 (+) sum x
let add_regret (sum: []f32) (f: []i32 -> f32) = map2 (\s x -> s + f x) sum actions
import "lib/github.com/diku-dk/cpprandom/random"
module rng = xorshift128plus
module dist = uniform_real_distribution f32 rng
type rng = rng.rng
let sample (dist: []f32) (rng: rng): (rng, []i32) =
let (rng, r) = dist.rand (0,1) rng
let cumm_probs = scan (+) 0 dist
let (_,a) = map (\(p,i) -> (r<=p, i)) (zip cumm_probs (iota (length dist)))
|> reduce_comm (\(aok, ai) (bok, bi) ->
if aok && bok then if ai < bi then (aok, ai)
else (bok, bi)
else if aok then (aok, ai)
else (bok, bi))
(false, length dist)
in (rng, copy actions[a])
let sample_and_update (player: Agent) (rng: rng): (rng, Agent, []i32) =
let action_distribution = normalize player.regret_sum
let player =
player with strategy_sum = add_sum player.strategy_sum action_distribution
let (rng, res) = sample action_distribution rng
in (rng, player, res)
let update_regret (one: Agent) (action_one: []i32, action_two: []i32) =
let self_utility = utility (action_one, action_two)
in one with regret_sum = add_regret one.regret_sum
(\a -> utility (a, action_two) - self_utility)
let vs (one: Agent, two: Agent) (rng: rng) =
let (rng, one, action_one) = sample_and_update one rng
let (rng, two, action_two) = sample_and_update two rng
let one = update_regret one (action_one, action_two)
let two = update_regret two (action_one, action_two)
in (rng, (one, two))
let train agents iterations rng =
let f (agents, rng) = let (rng, agents) = vs agents rng
in (agents, rng)
in iterate iterations f (agents, rng)
let player: Agent = { regret_sum = replicate (length actions) 0
, strategy_sum = replicate (length actions) 0 }
-- ==
-- compiled input { 10000000 }
let main (iterations: i32) =
train (player, player) iterations (rng.rng_from_seed [1,2,3])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment