athas/regret.fut

## regret.fut
type Agent = { strategy_sum: []f32
             , regret_sum: []f32
             }

type Setting = {num_fields: i32,
                num_soldiers: i32
               }

let compare (x: i32) (y: i32) =
  i32.sgn (x-y)

let utility (action_one, action_two): f32 =
  map2 compare action_one action_two
  |> i32.sum |> r32

-- FIXME: this initialisation is completely wrong, so I hardcode the
-- init generated by F# for the small problem.
let init (d: Setting): [][]i32 =
  tabulate_2d (d.num_soldiers*(d.num_soldiers+1)/2) d.num_fields
              (\i j -> (i*j)%d.num_soldiers)

-- let actions = init {num_fields = 3, num_soldiers = 5}

let actions = [[0, 0, 5], [0, 1, 4], [0, 2, 3], [0, 3, 2], [0, 4, 1],
               [0, 5, 0], [1, 0, 4], [1, 1, 3], [1, 2, 2], [1, 3, 1],
               [1, 4, 0], [2, 0, 3], [2, 1, 2], [2, 2, 1], [2, 3, 0],
               [3, 0, 2], [3, 1, 1], [3, 2, 0], [4, 0, 1], [4, 1, 0],
               [5, 0, 0]]

let normalize (array: []f32) =
  let temp = map (f32.max 0.0) array
  let normalizingSum = f32.sum temp

  in if normalizingSum > 0.0 then map (\x -> x / normalizingSum) temp
     else replicate (length temp) (1.0 / r32 (length actions))

let add_sum (sum: []f32) (x: []f32) = map2 (+) sum x
let add_regret (sum: []f32) (f: []i32 -> f32) = map2 (\s x -> s + f x) sum actions

import "lib/github.com/diku-dk/cpprandom/random"

module rng = xorshift128plus
module dist = uniform_real_distribution f32 rng
type rng = rng.rng

let sample (dist: []f32) (rng: rng): (rng, []i32) =
  let (rng, r) = dist.rand (0,1) rng
  let cumm_probs = scan (+) 0 dist
  let (_,a) = map (\(p,i) -> (r<=p, i)) (zip cumm_probs (iota (length dist)))
              |> reduce_comm (\(aok, ai) (bok, bi) ->
                                if aok && bok then if ai < bi then (aok, ai)
                                                   else (bok, bi)
                                else if aok then (aok, ai)
                                else (bok, bi))
                             (false, length dist)
  in (rng, copy actions[a])

let sample_and_update (player: Agent) (rng: rng): (rng, Agent, []i32) =
  let action_distribution = normalize player.regret_sum
  let player =
    player with strategy_sum = add_sum player.strategy_sum action_distribution
  let (rng, res) = sample action_distribution rng
  in (rng, player, res)

let update_regret (one: Agent) (action_one: []i32, action_two: []i32) =
  let self_utility = utility (action_one, action_two)
  in one with regret_sum = add_regret one.regret_sum
                                      (\a -> utility (a, action_two) - self_utility)

let vs (one: Agent, two: Agent) (rng: rng) =
  let (rng, one, action_one) = sample_and_update one rng
  let (rng, two, action_two) = sample_and_update two rng
  let one = update_regret one (action_one, action_two)
  let two = update_regret two (action_one, action_two)
  in (rng, (one, two))

let train agents iterations rng =
  let f (agents, rng) = let (rng, agents) = vs agents rng
                        in (agents, rng)
  in iterate iterations f (agents, rng)

let player: Agent = { regret_sum = replicate (length actions) 0
                    , strategy_sum = replicate (length actions) 0 }

-- ==
-- compiled input { 10000000 }
let main (iterations: i32) =
  train (player, player) iterations (rng.rng_from_seed [1,2,3])
	type Agent = { strategy_sum: []f32
	, regret_sum: []f32
	}

	type Setting = {num_fields: i32,
	num_soldiers: i32
	}

	let compare (x: i32) (y: i32) =
	i32.sgn (x-y)

	let utility (action_one, action_two): f32 =
	map2 compare action_one action_two
	\|> i32.sum \|> r32

	-- FIXME: this initialisation is completely wrong, so I hardcode the
	-- init generated by F# for the small problem.
	let init (d: Setting): [][]i32 =
	tabulate_2d (d.num_soldiers*(d.num_soldiers+1)/2) d.num_fields
	(\i j -> (i*j)%d.num_soldiers)

	-- let actions = init {num_fields = 3, num_soldiers = 5}

	let actions = [[0, 0, 5], [0, 1, 4], [0, 2, 3], [0, 3, 2], [0, 4, 1],
	[0, 5, 0], [1, 0, 4], [1, 1, 3], [1, 2, 2], [1, 3, 1],
	[1, 4, 0], [2, 0, 3], [2, 1, 2], [2, 2, 1], [2, 3, 0],
	[3, 0, 2], [3, 1, 1], [3, 2, 0], [4, 0, 1], [4, 1, 0],
	[5, 0, 0]]

	let normalize (array: []f32) =
	let temp = map (f32.max 0.0) array
	let normalizingSum = f32.sum temp

	in if normalizingSum > 0.0 then map (\x -> x / normalizingSum) temp
	else replicate (length temp) (1.0 / r32 (length actions))

	let add_sum (sum: []f32) (x: []f32) = map2 (+) sum x
	let add_regret (sum: []f32) (f: []i32 -> f32) = map2 (\s x -> s + f x) sum actions

	import "lib/github.com/diku-dk/cpprandom/random"

	module rng = xorshift128plus
	module dist = uniform_real_distribution f32 rng
	type rng = rng.rng

	let sample (dist: []f32) (rng: rng): (rng, []i32) =
	let (rng, r) = dist.rand (0,1) rng
	let cumm_probs = scan (+) 0 dist
	let (_,a) = map (\(p,i) -> (r<=p, i)) (zip cumm_probs (iota (length dist)))
	\|> reduce_comm (\(aok, ai) (bok, bi) ->
	if aok && bok then if ai < bi then (aok, ai)
	else (bok, bi)
	else if aok then (aok, ai)
	else (bok, bi))
	(false, length dist)
	in (rng, copy actions[a])

	let sample_and_update (player: Agent) (rng: rng): (rng, Agent, []i32) =
	let action_distribution = normalize player.regret_sum
	let player =
	player with strategy_sum = add_sum player.strategy_sum action_distribution
	let (rng, res) = sample action_distribution rng
	in (rng, player, res)

	let update_regret (one: Agent) (action_one: []i32, action_two: []i32) =
	let self_utility = utility (action_one, action_two)
	in one with regret_sum = add_regret one.regret_sum
	(\a -> utility (a, action_two) - self_utility)

	let vs (one: Agent, two: Agent) (rng: rng) =
	let (rng, one, action_one) = sample_and_update one rng
	let (rng, two, action_two) = sample_and_update two rng
	let one = update_regret one (action_one, action_two)
	let two = update_regret two (action_one, action_two)
	in (rng, (one, two))

	let train agents iterations rng =
	let f (agents, rng) = let (rng, agents) = vs agents rng
	in (agents, rng)
	in iterate iterations f (agents, rng)

	let player: Agent = { regret_sum = replicate (length actions) 0
	, strategy_sum = replicate (length actions) 0 }

	-- ==
	-- compiled input { 10000000 }
	let main (iterations: i32) =
	train (player, player) iterations (rng.rng_from_seed [1,2,3])