Skip to content

Instantly share code, notes, and snippets.

@sir-deenicus sir-deenicus/rps2 output
Last active Sep 13, 2017

Embed
What would you like to do?
Regret Minimization on Rock, Paper, Scissors (Games with gifts and no gifts)
LEARNED STRATEGY
Move | Move Probability
---- | ----------------
R | 0
P | 0.999
S | 0
T | 0
LEARNED STRATEGY VS FIXED ADVERSARY
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
-------- | -------- | ------ | ------ | ---------- | ------------- | -------------
P | R | 0.999 | 0.4 | 0.399 | 0.399 | -0.399
P | S | 0.999 | 0.1 | 0.1 | -0.1 | 0.1
P | T | 0.999 | 0.1 | 0.1 | -0.1 | 0.1
Player1: 0.2 | Player2 : -0.2
~Strategy EV (10 rounds, $5/round)
[📰,100%] vs [👊,40% ;📰, 40%; ✂, 10%; 📠,10%] = 12.0
EQUIL1
Move | Move Probability
---- | ----------------
R | 0.332
P | 0.333
S | 0.335
T | 0
EQUIL2
Move | Move Probability
---- | ----------------
R | 0.333
P | 0.335
S | 0.333
T | 0
EQUIL
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
-------- | -------- | ------ | ------ | ---------- | ------------- | -------------
R | P | 0.332 | 0.335 | 0.111 | -0.111 | 0.111
R | S | 0.332 | 0.333 | 0.11 | 0.11 | -0.11
P | R | 0.333 | 0.333 | 0.111 | 0.111 | -0.111
P | S | 0.333 | 0.333 | 0.111 | -0.111 | 0.111
S | R | 0.335 | 0.333 | 0.111 | -0.111 | 0.111
S | P | 0.335 | 0.335 | 0.112 | 0.112 | -0.112
Player1: 0.0 | Player2 : 0.0
EQUIL1 VS ADV
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
-------- | -------- | ------ | ------ | ---------- | ------------- | -------------
R | P | 0.334 | 0.2 | 0.067 | -0.067 | 0.067
R | S | 0.334 | 0.1 | 0.033 | 0.033 | -0.033
R | T | 0.334 | 0.4 | 0.134 | 0.134 | -0.134
P | R | 0.331 | 0.3 | 0.099 | 0.099 | -0.099
P | S | 0.331 | 0.1 | 0.033 | -0.033 | 0.033
P | T | 0.331 | 0.4 | 0.133 | -0.133 | 0.133
S | R | 0.335 | 0.3 | 0.1 | -0.1 | 0.1
S | P | 0.335 | 0.2 | 0.067 | 0.067 | -0.067
S | T | 0.335 | 0.4 | 0.134 | 0.134 | -0.134
Player1: 0.13 | Player2 : -0.13
ROCK VS EQUIL2
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
-------- | -------- | ------ | ------ | ---------- | ------------- | -------------
R | P | 1 | 0.335 | 0.335 | -0.335 | 0.335
R | S | 1 | 0.333 | 0.333 | 0.333 | -0.333
Player1: 0.0 | Player2 : 0.0
open System
let random = new Random()
module Array =
let indexed a = Array.mapi (fun i x -> (i,x)) a
let selectColumn c a =
Array.map (indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a
module String =
let pad padlen (s:string) = s + String.replicate (max 0 (padlen - s.Length)) " "
let inline toupper (str:string) = str.ToUpper()
let inline joinToStringWith sep (s:'a seq) = String.Join(sep, s)
let buildTableRow (collens:_[]) (row:string[]) =
row |> Array.mapi (fun i s -> String.pad collens.[i] s) |> joinToStringWith " | "
let makeTable newline headers title (table:string[][]) =
let hlen = Array.map String.length headers
let lens = table |> Array.map (Array.map (String.length))
let longest = [|for c in 0..headers.Length - 1 -> max hlen.[c] (Array.selectColumn c lens |> Array.map Seq.head |> Array.max)|]
let t0 = table |> Array.map (buildTableRow longest) |> joinToStringWith newline
let hrow = [|headers; [|for i in 0..headers.Length - 1 -> String.replicate longest.[i] "-"|]|]
|> Array.map (buildTableRow longest)
|> joinToStringWith newline
String.Format("{0}{1}{1}{2}{1}{3}", (String.toupper title), newline, hrow, t0)
let cdf p =
p |> Array.fold (fun (total, list) v ->
let cd = v + total
cd, cd::list) (0., [])
|> snd
|> Array.ofList
let getDiscreteSampleFromCDF (pcdf:float[]) =
let k, pcdlen = random.NextDouble() * pcdf.[0], pcdf.Length - 1
let rec cummProb idx = if k > pcdf.[idx] then cummProb (idx - 1) else idx
abs(cummProb pcdlen - pcdlen)
let discreteSample p = cdf p |> getDiscreteSampleFromCDF
let inline pairop op (x,y) (u,v) = (op x u, op y v)
let round (n:int) (x:float) = Math.Round(x,n)
let selectColumn c a = Array.map (Array.indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a
let getStrategy (strategySum:_[]) regretSum =
let strategy = Array.map (max 0.) regretSum
let nSum, numactions = Array.sum strategy, strategy.Length
let strategy' =
if nSum <= 0. then
Array.create numactions (1./float numactions)
else Array.map (fun x -> x/nSum) strategy
for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + strategy'.[a]
strategy'
let getAvgStrategy strategySum =
let nSum, numactions = Array.sum strategySum, strategySum.Length
if nSum <= 0. then
Array.create numactions (1./float numactions)
else Array.map (fun x -> x/nSum) strategySum
let getAction (actions:_[]) strategy = actions.[discreteSample strategy]
let train1 iters f actions actionDistr regretSum strategySum =
for _ in 0..iters - 1 do
let strategy = getStrategy strategySum regretSum
let otheraction = getAction actions actionDistr
let heroaction = getAction actions strategy
let heroutil,_ = f (heroaction,otheraction)
Array.iteri (fun a action ->
let altutil, _ = f (action,otheraction)
regretSum.[a] <- regretSum.[a] + (altutil - heroutil)) actions
let train iters f actions regretSum regretSum2 strategySum strategySum2 =
for _ in 0..iters - 1 do
let strategy = getStrategy strategySum regretSum
let strategy2 = getStrategy strategySum2 regretSum2
let otheraction = getAction actions strategy2
let heroaction = getAction actions strategy
let heroutil,otherutil = f (heroaction,otheraction)
Array.iteri (fun a action ->
let altutil , _ = f (action,otheraction)
let altutil2, _ = f (action,heroaction)
regretSum.[a] <- regretSum.[a] + (altutil - heroutil)
regretSum2.[a] <- regretSum2.[a] + (altutil2 - otherutil)) actions
let inline displayTable2 title roundTableTo roundProbTo keepZeroes winner strat1 strat2 =
let buildTable player =
[|for (move,p) in strat1 do
for (move2,p2) in strat2 ->
[|string move; string move2; string (round roundTableTo p) ;
string (round roundTableTo p2); string (round roundTableTo (p * p2)); ""; ""|] ,
(player (winner (move,move2)) * (p * p2) ,
player (winner (move2,move)) * (p * p2))|]
let table = buildTable fst
let tabledisp = Array.filter (fun (_,(p,_)) -> keepZeroes || round roundProbTo p <> 0.) table
|> Array.map (fun (row,(u,u2)) -> row.[row.Length - 2] <- string (round roundTableTo u)
row.[row.Length - 1] <- string (round roundTableTo u2); row)
let astable = makeTable "\n" [|"Player 1";"Player 2";"Prob 1";"Prob 2";"Joint Prob";"Player 1 Util";"Player 2 Util"|] title tabledisp
printfn "\n%s\n\nPlayer1: %A | Player2 : %A"
astable (Array.sumBy (snd >> fst) (table) |> round roundProbTo)
(Array.sumBy (snd >> snd) (table) |> round roundProbTo)
let inline displayTable title winner strat1 strat2 = displayTable2 title 3 2 false winner strat1 strat2
let inline printStrategy2 title n strat =
let rows = Array.map (fun (move,p) -> [|string move; string(round n p)|]) strat
let astable = makeTable "\n" [|"Move";"Move Probability"|] title rows
printfn "\n%s" astable
let inline printStrategy title strat = printStrategy2 title 3 strat
let simulatePlay dodisp rounds bet winner strat1 strat2 =
let results =
[|for _ in 0..rounds - 1 ->
let move1 = strat1 |> Array.unzip ||> getAction
let move2 = strat2 |> Array.unzip ||> getAction
let res1, res2 = winner (move1, move2)
res1 * bet, res2 * bet|]
let p1win,p2win = Array.fold (pairop (+)) (0.,0.) results
if dodisp then printfn "Player1 wins: %A | Player2 wins: %A" p1win p2win
p1win,p2win
//////////////////////////////
let reset (a:_[]) = for i in 0..a.Length - 1 do a.[i] <- 0.
type Moves2 = R | P | S | T
//The numbers control the utilities for the game. Ensure left and right sum to 0.
let winner2 =
function
| (R,R) -> ( 0.,0. )
| (P,R) -> ( 1.,-1.)
| (S,R) -> (-1.,1. )
| (T,R) -> (1.,-1. )
| (R,P) -> (-1.,1. )
| (P,P) -> ( 0.,0. )
| (S,P) -> ( 1.,-1.)
| (T,P) -> ( 1.,-1.)
| (R,S) -> ( 1.,-1.)
| (P,S) -> (-1.,1. )
| (S,S) -> ( 0.,0. )
| (T,S) -> ( -1.,1.)
| (R,T) -> (-1.,1. )
| (P,T) -> (-1.,1. )
| (S,T) -> (1.,-1. )
| (T,T) -> (0.,0. )
//The numbers control the utilities for the game. Ensure left and right sum to 0.
let winner3 =
function
| (R,R) -> ( 0.,0. )
| (P,R) -> ( 1.,-1.)
| (S,R) -> (-1.,1. )
//This essentially flips a coin but can change the number to <= 1.
| (T,R) -> if random.NextDouble() < 0.5 then (1.,-1. ) else (-1.,1. )
| (R,P) -> (-1.,1. )
| (P,P) -> ( 0.,0. )
| (S,P) -> ( 1.,-1.)
//roll a die with sides 4 or less as wins.
| (T,P) -> if random.NextDouble() < 4./6. then (1.,-1. ) else (-1.,1. )
| (R,S) -> ( 1.,-1.)
| (P,S) -> (-1.,1. )
| (S,S) -> ( 0.,0. )
| (T,S) -> (-1.,1.)
| (R,T) -> if random.NextDouble() < 0.5 then (-1.,1. ) else (1.,-1. )
| (P,T) -> if random.NextDouble() < 4./6. then (-1.,1. ) else (1.,-1. )
| (S,T) -> (1.,-1.)
| (T,T) -> (0.,0. )
//---------------------------
let regretSumX = [|0.;0.;0.;0.|]
let strategySumX = [|0.;0.;0.;0.|]
let regretSum2X = [|0.;0.;0.;0.|]
let strategySum2X = [|0.;0.;0.;0.|]
let rpstStratDef = Array.zip [|R;P;S;T|] [|0.4;0.4;0.1;0.1|] //<== These numbers can be changed. Ensure sums to 1.
let rpstStratDef2 = Array.zip [|R;P;S;T|] [|0.3;0.2;0.1;0.4|] //<== These numbers can be changed. Ensure sums to 1.
let purePaper = Array.zip [|R;P;S;T|] [|0.;1.;0.;0.|]
let pureRock = Array.zip [|R;P;S;T|] [|1.;0.;0.;0.|]
let pureTelephone = Array.zip [|R;P;S;T|] [|0.;0.;0.;1.|]
let winnerFunction = winner3 //<== Change the name of this function to either *winner2* or *winner3* to control what is learnt
train1 200_000 winnerFunction [|R;P;S;T|] (Array.map snd rpstStratDef) regretSumX strategySumX
let rpstStrat1 = Array.zip [|R;P;S;T|] (getAvgStrategy strategySumX)
printStrategy "Learned Strategy" rpstStrat1
displayTable "Learned Strategy vs Fixed Adversary" winnerFunction rpstStrat1 rpstStratDef //note that displayTable probabilities and expected values aren't quite correct for winner3
let avg = [|for _ in 0..99 -> simulatePlay false 10 5. winnerFunction rpstStrat1 rpstStratDef|] |> Array.averageBy fst
printfn "\n~Strategy EV (10 rounds, $5/round)\n[📰,100%%] vs [👊,40%% ;📰, 40%%; ✂, 10%%; 📠,10%%] = %A" avg
//---------------------------
reset regretSumX; reset strategySumX
train 200_000 winnerFunction [|R;P;S;T|] regretSumX regretSum2X strategySumX strategySum2X
let rpstStrat1b, rpstStrat2 =
Array.zip [|R;P;S;T|] (getAvgStrategy strategySumX) ,
Array.zip [|R;P;S;T|] (getAvgStrategy strategySum2X)
printStrategy "Equil1" rpstStrat1b
printStrategy "Equil2" rpstStrat2
displayTable "Equil" winnerFunction rpstStrat1b rpstStrat2
displayTable "equil1 vs adv" winnerFunction rpstStrat1b rpstStratDef2
displayTable "rock vs equil2" winnerFunction pureRock rpstStrat2
@sir-deenicus

This comment has been minimized.

Copy link
Owner Author

commented Sep 13, 2017

LEARNED STRATEGY

Move Move Probability
R 0
P 0.999
S 0
T 0

LEARNED STRATEGY VS FIXED ADVERSARY

Player 1 Player 2 Prob 1 Prob 2 Joint Prob Player 1 Util Player 2 Util
P R 0.999 0.4 0.399 0.399 -0.399
P S 0.999 0.1 0.1 -0.1 0.1
P T 0.999 0.1 0.1 -0.1 0.1

Player1: 0.2 | Player2 : -0.2

~Strategy EV (10 rounds, $5/round)
[📰,100%] vs [,40% ;📰, 40%; ✂️, 10%; ☎️,10%] = 12.0

EQUIL1

Move Move Probability
R 0.332
P 0.333
S 0.335
T 0

EQUIL2

Move Move Probability
R 0.333
P 0.335
S 0.333
T 0

EQUIL

Player 1 Player 2 Prob 1 Prob 2 Joint Prob Player 1 Util Player 2 Util
R P 0.332 0.335 0.111 -0.111 0.111
R S 0.332 0.333 0.11 0.11 -0.11
P R 0.333 0.333 0.111 0.111 -0.111
P S 0.333 0.333 0.111 -0.111 0.111
S R 0.335 0.333 0.111 -0.111 0.111
S P 0.335 0.335 0.112 0.112 -0.112

Player1: 0.0 | Player2 : 0.0

EQUIL1 VS ADV

Player 1 Player 2 Prob 1 Prob 2 Joint Prob Player 1 Util Player 2 Util
R P 0.334 0.2 0.067 -0.067 0.067
R S 0.334 0.1 0.033 0.033 -0.033
R T 0.334 0.4 0.134 0.134 -0.134
P R 0.331 0.3 0.099 0.099 -0.099
P S 0.331 0.1 0.033 -0.033 0.033
P T 0.331 0.4 0.133 -0.133 0.133
S R 0.335 0.3 0.1 -0.1 0.1
S P 0.335 0.2 0.067 0.067 -0.067
S T 0.335 0.4 0.134 0.134 -0.134

Player1: 0.13 | Player2 : -0.13

ROCK VS EQUIL2

Player 1 Player 2 Prob 1 Prob 2 Joint Prob Player 1 Util Player 2 Util
R P 1 0.335 0.335 -0.335 0.335
R S 1 0.333 0.333 0.333 -0.333

Player1: 0.0 | Player2 : 0.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.