Skip to content

Instantly share code, notes, and snippets.

@sir-deenicus
Created September 13, 2017 05:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sir-deenicus/2ac6a2a064f4a82e09d81e60156d8915 to your computer and use it in GitHub Desktop.
Save sir-deenicus/2ac6a2a064f4a82e09d81e60156d8915 to your computer and use it in GitHub Desktop.
Regret Minimization on Least Unique Integer Game
Attempt at Nash
GUESS STRATEGY
Move | Move Probability
---- | ----------------
0 | 0.501
1 | 0.251
2 | 0.125
3 | 0.063
4 | 0.03
5 | 0.03
GUESS STRATEGY
Move | Move Probability
---- | ----------------
0 | 0.501
1 | 0.251
2 | 0.125
3 | 0.063
4 | 0.03
5 | 0.03
GUESS STRATEGY
Move | Move Probability
---- | ----------------
0 | 0.501
1 | 0.251
2 | 0.125
3 | 0.063
4 | 0.03
5 | 0.03
=====================
Regret Matching on each other`s actions
GUESS STRATEGY
Move | Move Probability
---- | ----------------
0 | 1
1 | 0
2 | 0
3 | 0
4 | 0
5 | 0
GUESS STRATEGY
Move | Move Probability
---- | ----------------
0 | 0.6
1 | 0.4
2 | 0
3 | 0
4 | 0
5 | 0
GUESS STRATEGY
Move | Move Probability
---- | ----------------
0 | 0
1 | 0.333
2 | 0.333
3 | 0.333
4 | 0
5 | 0
=========
Winrate of Strategy 1: 0.2
open System
let random = new Random()
let joinpair f (a,b) = f a b
let keepLeft f (x,y) = x, f y
let flip f a b = f b a
module Array =
let indexed a = Array.mapi (fun i x -> (i,x)) a
let selectColumn c a =
Array.map (indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a
let inline normalizeWeights (a: ('a * 'b) []) =
let tot = Array.sumBy snd a
Array.map (keepLeft (flip (/) tot)) a
module String =
let pad padlen (s:string) = s + String.replicate (max 0 (padlen - s.Length)) " "
let inline toupper (str:string) = str.ToUpper()
let inline joinToStringWith sep (s:'a seq) = String.Join(sep, s)
let buildTableRow (collens:_[]) (row:string[]) =
row |> Array.mapi (fun i s -> String.pad collens.[i] s) |> joinToStringWith " | "
let makeTable newline headers title (table:string[][]) =
let hlen = Array.map String.length headers
let lens = table |> Array.map (Array.map (String.length))
let longest = [|for c in 0..headers.Length - 1 -> max hlen.[c] (Array.selectColumn c lens |> Array.map Seq.head |> Array.max)|]
let t0 = table |> Array.map (buildTableRow longest) |> joinToStringWith newline
let hrow = [|headers; [|for i in 0..headers.Length - 1 -> String.replicate longest.[i] "-"|]|]
|> Array.map (buildTableRow longest)
|> joinToStringWith newline
String.Format("{0}{1}{1}{2}{1}{3}", (String.toupper title), newline, hrow, t0)
let cdf p =
p |> Array.fold (fun (total, list) v ->
let cd = v + total
cd, cd::list) (0., [])
|> snd
|> Array.ofList
let getDiscreteSampleFromCDF (pcdf:float[]) =
let k, pcdlen = random.NextDouble() * pcdf.[0], pcdf.Length - 1
let rec cummProb idx = if k > pcdf.[idx] then cummProb (idx - 1) else idx
abs(cummProb pcdlen - pcdlen)
let discreteSample p = cdf p |> getDiscreteSampleFromCDF
let round (n:int) (x:float) = Math.Round(x,n)
let getStrategy (strategySum:_[]) regretSum =
let strategy = Array.map (max 0.) regretSum
let nSum, numactions = Array.sum strategy, strategy.Length
let strategy' =
if nSum <= 0. then
Array.create numactions (1./float numactions)
else Array.map (fun x -> x/nSum) strategy
for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + strategy'.[a]
strategy'
let getAvgStrategy strategySum =
let nSum, numactions = Array.sum strategySum, strategySum.Length
if nSum <= 0. then
Array.create numactions (1./float numactions)
else Array.map (fun x -> x/nSum) strategySum
let getAction (actions:_[]) strategy = actions.[discreteSample strategy]
let inline printStrategy2 title n strat =
let rows = Array.map (fun (move,p) -> [|string move; string(round n p)|]) strat
let astable = makeTable "\n" [|"Move";"Move Probability"|] title rows
printfn "\n%s" astable
let inline printStrategy title strat = printStrategy2 title 3 strat
//////////////////////////////
let rateOutcomeForFirst (a,b,c) =
if a = b && a = c && b = c then 0.
elif a < b && a < c then 2.
elif a <> b && b = c then 2.
else -1.
let trainstep iters util actions stratsList =
for _ in 0..iters - 1 do
let strats = Array.map (fun (s,r) -> getStrategy s r) stratsList
let acts = Array.map (getAction actions) strats
let heroutil = util (acts.[0],acts.[1],acts.[2])
let otherutil = util (acts.[1],acts.[0],acts.[2])
let otherutil2 = util (acts.[2],acts.[0],acts.[1])
let utils = [|heroutil;otherutil;otherutil2|]
let jointRegretSum = Array.map snd stratsList
Array.iteri (fun a action ->
let altutil:float = util (action,acts.[1],acts.[2])
let altutil2 = util (action,acts.[0],acts.[2])
let altutil3 = util (action,acts.[0],acts.[1])
let altutils = [|altutil;altutil2;altutil3|]
for i in 0..2 do
jointRegretSum.[i].[a] <- jointRegretSum.[i].[a] + (altutils.[i] - utils.[i])) actions
let actions = [|0..5|]
let createStrats = Array.init 3 (fun _ -> Array.create actions.Length 0., Array.create actions.Length 0.)
let createStrats2 = Array.create 3 (Array.create actions.Length 0., Array.create actions.Length 0.)
trainstep 50_000 rateOutcomeForFirst actions createStrats
trainstep 20_000 rateOutcomeForFirst actions createStrats2
let naiveGuesser = [|0.7;0.15;0.1;0.0;0.0;0.05|]
printfn "\nAttempt at Nash"
Array.iter (fun (s,_) -> Array.zip actions (getAvgStrategy s) |> printStrategy "Guess Strategy") createStrats2
printfn "====================="
printfn "\nRegret Matching on each other`s actions"
Array.iter (fun (s,_) -> Array.zip actions (getAvgStrategy s) |> printStrategy "Guess Strategy") createStrats
let arrayToTriple (x:_[]) = x.[0],x.[1],x.[2]
let swap3b (a,b,c) = b,a,c
let swap3c (a,b,c) = c,a,b
let runRound () = createStrats2
|> Array.mapi (fun i (s,_) ->
if i > 0 then discreteSample naiveGuesser // <== CHANGE the 0 to a 1 or 3 or 2
else (actions, (getAvgStrategy s)) ||> getAction )
|> arrayToTriple
|> id // ** <== CHANGE to swap3b**
|> rateOutcomeForFirst
[|for _ in 0..999 -> runRound()|]
|> Array.groupBy id
|> Array.map (keepLeft (Array.length>>float))
|> Array.normalizeWeights
|> Array.sumBy (joinpair ( * ))
|> round 2
|> printfn "\n=========\nWinrate of Strategy 1: %A"
@sir-deenicus
Copy link
Author

sir-deenicus commented Sep 13, 2017

Attempt at Nash

GUESS STRATEGY

Move Move Probability
0 0.501
1 0.251
2 0.125
3 0.063
4 0.03
5 0.03

GUESS STRATEGY

Move Move Probability
0 0.501
1 0.251
2 0.125
3 0.063
4 0.03
5 0.03

GUESS STRATEGY

Move Move Probability
0 0.501
1 0.251
2 0.125
3 0.063
4 0.03
5 0.03

Regret Matching on each other`s actions

GUESS STRATEGY

Move Move Probability
0 1
1 0
2 0
3 0
4 0
5 0

GUESS STRATEGY

Move Move Probability
0 0.6
1 0.4
2 0
3 0
4 0
5 0

GUESS STRATEGY

Move Move Probability
0 0
1 0.333
2 0.333
3 0.333
4 0
5 0

=========

Winrate of Strategy 1: 0.2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment