Created
March 10, 2019 19:04
-
-
Save athas/9ed3acc3c9a439ff94a5d4118bdbb605 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type Agent = { strategy_sum: []f32 | |
, regret_sum: []f32 | |
} | |
type Setting = {num_fields: i32, | |
num_soldiers: i32 | |
} | |
let compare (x: i32) (y: i32) = | |
i32.sgn (x-y) | |
let utility (action_one, action_two): f32 = | |
map2 compare action_one action_two | |
|> i32.sum |> r32 | |
-- FIXME: this initialisation is completely wrong, so I hardcode the | |
-- init generated by F# for the small problem. | |
let init (d: Setting): [][]i32 = | |
tabulate_2d (d.num_soldiers*(d.num_soldiers+1)/2) d.num_fields | |
(\i j -> (i*j)%d.num_soldiers) | |
-- let actions = init {num_fields = 3, num_soldiers = 5} | |
let actions = [[0, 0, 5], [0, 1, 4], [0, 2, 3], [0, 3, 2], [0, 4, 1], | |
[0, 5, 0], [1, 0, 4], [1, 1, 3], [1, 2, 2], [1, 3, 1], | |
[1, 4, 0], [2, 0, 3], [2, 1, 2], [2, 2, 1], [2, 3, 0], | |
[3, 0, 2], [3, 1, 1], [3, 2, 0], [4, 0, 1], [4, 1, 0], | |
[5, 0, 0]] | |
let normalize (array: []f32) = | |
let temp = map (f32.max 0.0) array | |
let normalizingSum = f32.sum temp | |
in if normalizingSum > 0.0 then map (\x -> x / normalizingSum) temp | |
else replicate (length temp) (1.0 / r32 (length actions)) | |
let add_sum (sum: []f32) (x: []f32) = map2 (+) sum x | |
let add_regret (sum: []f32) (f: []i32 -> f32) = map2 (\s x -> s + f x) sum actions | |
import "lib/github.com/diku-dk/cpprandom/random" | |
module rng = xorshift128plus | |
module dist = uniform_real_distribution f32 rng | |
type rng = rng.rng | |
let sample (dist: []f32) (rng: rng): (rng, []i32) = | |
let (rng, r) = dist.rand (0,1) rng | |
let cumm_probs = scan (+) 0 dist | |
let (_,a) = map (\(p,i) -> (r<=p, i)) (zip cumm_probs (iota (length dist))) | |
|> reduce_comm (\(aok, ai) (bok, bi) -> | |
if aok && bok then if ai < bi then (aok, ai) | |
else (bok, bi) | |
else if aok then (aok, ai) | |
else (bok, bi)) | |
(false, length dist) | |
in (rng, copy actions[a]) | |
let sample_and_update (player: Agent) (rng: rng): (rng, Agent, []i32) = | |
let action_distribution = normalize player.regret_sum | |
let player = | |
player with strategy_sum = add_sum player.strategy_sum action_distribution | |
let (rng, res) = sample action_distribution rng | |
in (rng, player, res) | |
let update_regret (one: Agent) (action_one: []i32, action_two: []i32) = | |
let self_utility = utility (action_one, action_two) | |
in one with regret_sum = add_regret one.regret_sum | |
(\a -> utility (a, action_two) - self_utility) | |
let vs (one: Agent, two: Agent) (rng: rng) = | |
let (rng, one, action_one) = sample_and_update one rng | |
let (rng, two, action_two) = sample_and_update two rng | |
let one = update_regret one (action_one, action_two) | |
let two = update_regret two (action_one, action_two) | |
in (rng, (one, two)) | |
let train agents iterations rng = | |
let f (agents, rng) = let (rng, agents) = vs agents rng | |
in (agents, rng) | |
in iterate iterations f (agents, rng) | |
let player: Agent = { regret_sum = replicate (length actions) 0 | |
, strategy_sum = replicate (length actions) 0 } | |
-- == | |
-- compiled input { 10000000 } | |
let main (iterations: i32) = | |
train (player, player) iterations (rng.rng_from_seed [1,2,3]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment