Skip to content

Instantly share code, notes, and snippets.

@sir-deenicus sir-deenicus/rps output
Last active Sep 13, 2017

Embed
What would you like to do?
Regret Minimization. Nash Equilibrium for Rock Paper Scissors
STRATEGY VS STATIC ADVERSARY
Move | Move Probability
-------- | ----------------
Rock | 0
Paper | 1
Scissors | 0
STRATEGY VS STATIC ADVERSARY
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
-------- | -------- | ------ | ------ | ---------- | ------------- | -------------
Paper | Rock | 1 | 0.7 | 0.7 | 0.7 | -0.7
Paper | Scissors | 1 | 0.2 | 0.2 | -0.2 | 0.2
Player1: 0.5 | Player2 : -0.5
STATIC ADVERSARY2 VS STATIC ADVERSARY1
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
-------- | -------- | ------ | ------ | ---------- | ------------- | -------------
Rock | Paper | 0.2 | 0.1 | 0.02 | -0.02 | 0.02
Rock | Scissors | 0.2 | 0.2 | 0.04 | 0.04 | -0.04
Paper | Rock | 0.7 | 0.7 | 0.49 | 0.49 | -0.49
Paper | Scissors | 0.7 | 0.2 | 0.14 | -0.14 | 0.14
Scissors | Rock | 0.1 | 0.7 | 0.07 | -0.07 | 0.07
Scissors | Paper | 0.1 | 0.1 | 0.01 | 0.01 | -0.01
Player1: 0.31 | Player2 : -0.31
STATIC ADVERSARY1 VS MIXED STRATEGY
Player 1 | Player 2 | Prob 1 | Prob 2 | Joint Prob | Player 1 Util | Player 2 Util
-------- | -------- | ------ | ------ | ---------- | ------------- | -------------
Rock | Paper | 0.2 | 0.333 | 0.067 | -0.067 | 0.067
Rock | Scissors | 0.2 | 0.333 | 0.067 | 0.067 | -0.067
Paper | Rock | 0.7 | 0.333 | 0.233 | 0.233 | -0.233
Paper | Scissors | 0.7 | 0.333 | 0.233 | -0.233 | 0.233
Scissors | Rock | 0.1 | 0.333 | 0.033 | -0.033 | 0.033
Scissors | Paper | 0.1 | 0.333 | 0.033 | 0.033 | -0.033
Player1: 0.0 | Player2 : 0.0
Strategy vs Adversary 100 rounds of 5$/rounds
Player1 wins: 210.0 | Player2 wins: -210.0
Player1=Mixed Strategy vs Adversary 100 rounds of 5$/rounds
Player1 wins: 5.0 | Player2 wins: -5.0
[πŸ“°, 100%] vs [πŸ‘Š,70% ;πŸ“°, 10%; βœ‚, 20%]=249.425
[πŸ‘Š,70% ;πŸ“°, 10%; βœ‚, 20%] vs [πŸ‘Š,1/3 ;πŸ“°, 1/3; βœ‚, 1/3 ] = 0.175
open System
let random = new Random()
module Array =
let indexed a = Array.mapi (fun i x -> (i,x)) a
let selectColumn c a =
Array.map (indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a
module String =
let pad padlen (s:string) = s + String.replicate (max 0 (padlen - s.Length)) " "
let inline toupper (str:string) = str.ToUpper()
let inline joinToStringWith sep (s:'a seq) = String.Join(sep, s)
let buildTableRow (collens:_[]) (row:string[]) =
row |> Array.mapi (fun i s -> String.pad collens.[i] s) |> joinToStringWith " | "
let makeTable newline headers title (table:string[][]) =
let hlen = Array.map String.length headers
let lens = table |> Array.map (Array.map (String.length))
let longest = [|for c in 0..headers.Length - 1 -> max hlen.[c] (Array.selectColumn c lens |> Array.map Seq.head |> Array.max)|]
let t0 = table |> Array.map (buildTableRow longest) |> joinToStringWith newline
let hrow = [|headers; [|for i in 0..headers.Length - 1 -> String.replicate longest.[i] "-"|]|]
|> Array.map (buildTableRow longest)
|> joinToStringWith newline
String.Format("{0}{1}{1}{2}{1}{3}", (String.toupper title), newline, hrow, t0)
let cdf p =
p |> Array.fold (fun (total, list) v ->
let cd = v + total
cd, cd::list) (0., [])
|> snd
|> Array.ofList
let getDiscreteSampleFromCDF (pcdf:float[]) =
let k, pcdlen = random.NextDouble() * pcdf.[0], pcdf.Length - 1
let rec cummProb idx = if k > pcdf.[idx] then cummProb (idx - 1) else idx
abs(cummProb pcdlen - pcdlen)
let discreteSample p = cdf p |> getDiscreteSampleFromCDF
let inline pairop op (x,y) (u,v) = (op x u, op y v)
let round (n:int) (x:float) = Math.Round(x,n)
let selectColumn c a = Array.map (Array.indexed >> Array.filter (fst >> (=) c) >> Array.map snd) a
let getStrategy (strategySum:_[]) regretSum =
let strategy = Array.map (max 0.) regretSum
let nSum, numactions = Array.sum strategy, strategy.Length
let strategy' =
if nSum <= 0. then
Array.create numactions (1./float numactions)
else Array.map (fun x -> x/nSum) strategy
for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + strategy'.[a]
strategy'
let getAvgStrategy strategySum =
let nSum, numactions = Array.sum strategySum, strategySum.Length
if nSum <= 0. then
Array.create numactions (1./float numactions)
else Array.map (fun x -> x/nSum) strategySum
let getAction (actions:_[]) strategy = actions.[discreteSample strategy]
let train1 iters f actions actionDistr regretSum strategySum =
for _ in 0..iters - 1 do
let strategy = getStrategy strategySum regretSum
let otheraction = getAction actions actionDistr
let heroaction = getAction actions strategy
let heroutil,_ = f (heroaction,otheraction)
Array.iteri (fun a action ->
let altutil, _ = f (action,otheraction)
regretSum.[a] <- regretSum.[a] + (altutil - heroutil)) actions
let inline displayTable2 title roundTableTo roundProbTo keepZeroes winner strat1 strat2 =
let buildTable player =
[|for (move,p) in strat1 do
for (move2,p2) in strat2 ->
[|string move; string move2; string (round roundTableTo p) ;
string (round roundTableTo p2); string (round roundTableTo (p * p2)); ""; ""|] ,
(player (winner (move,move2)) * (p * p2) ,
player (winner (move2,move)) * (p * p2))|]
let table = buildTable fst
let tabledisp = Array.filter (fun (_,(p,_)) -> keepZeroes || round roundProbTo p <> 0.) table
|> Array.map (fun (row,(u,u2)) -> row.[row.Length - 2] <- string (round roundTableTo u)
row.[row.Length - 1] <- string (round roundTableTo u2); row)
let astable = makeTable "\n" [|"Player 1";"Player 2";"Prob 1";"Prob 2";"Joint Prob";"Player 1 Util";"Player 2 Util"|] title tabledisp
printfn "%s\n\nPlayer1: %A | Player2 : %A"
astable (Array.sumBy (snd >> fst) (table) |> round roundProbTo)
(Array.sumBy (snd >> snd) (table) |> round roundProbTo)
let inline displayTable title winner strat1 strat2 = displayTable2 title 3 2 false winner strat1 strat2
let inline printStrategy2 title n strat =
let rows = Array.map (fun (move,p) -> [|string move; string(round n p)|]) strat
let astable = makeTable "\n" [|"Move";"Move Probability"|] title rows
printfn "%s" astable
let inline printStrategy title strat = printStrategy2 title 3 strat
let simulatePlay dodisp rounds bet winner strat1 strat2 =
let results =
[|for _ in 0..rounds - 1 ->
let move1 = strat1 |> Array.unzip ||> getAction
let move2 = strat2 |> Array.unzip ||> getAction
let res1, res2 = winner (move1, move2)
res1 * bet, res2 * bet|]
let p1win,p2win = Array.fold (pairop (+)) (0.,0.) results
if dodisp then printfn "Player1 wins: %A | Player2 wins: %A" p1win p2win
p1win,p2win
//////////////////////////////
type Moves = Rock | Paper | Scissors
let winner =
function
| (Rock, Rock) -> ( 0.,0. )
| (Paper, Rock) -> ( 1.,-1.)
| (Scissors, Rock) -> (-1.,1. )
| (Rock, Paper) -> (-1.,1. )
| (Paper, Paper) -> ( 0.,0. )
| (Scissors, Paper) -> ( 1.,-1.)
| (Rock, Scissors) -> ( 1.,-1.)
| (Paper, Scissors) -> (-1.,1. )
| (Scissors, Scissors) -> ( 0.,0. )
let regretSum = [|0.;0.;0.|]
let strategySum = [|0.;0.;0.|]
let rpsStratDef = Array.zip [|Rock;Paper;Scissors|] [|0.7;0.1;0.2|] //<== These numbers can be changed. Ensure sums to 1.
let rpsStratDef2 = Array.zip [|Rock;Paper;Scissors|] [|0.2;0.7;0.1|] //<== These numbers can be changed. Ensure sums to 1.
let rpsNE = Array.zip [|Rock;Paper;Scissors|] [|1./3.;1./3.;1./3.|]
// ___________
// | |
//can change this \|/
train1 20_000 winner [|Rock;Paper;Scissors|] (Array.map snd rpsStratDef) regretSum strategySum
let rpsStrat1 = Array.zip [|Rock;Paper;Scissors|] (getAvgStrategy strategySum)
printStrategy "Strategy vs Static Adversary" rpsStrat1
displayTable "Strategy vs Static Adversary" winner rpsStrat1 rpsStratDef
displayTable "Static Adversary2 vs Static Adversary1" winner rpsStratDef2 rpsStratDef
displayTable "Static Adversary1 vs Mixed Strategy" winner rpsStratDef2 rpsNE
printfn "Strategy vs Adversary 100 rounds of 5$/rounds"
simulatePlay true 100 5. winner rpsStrat1 rpsStratDef
printfn "Player1=Mixed Strategy vs Adversary 100 rounds of 5$/rounds"
simulatePlay true 100 5. winner rpsStratDef rpsNE
let playstats = [|for _ in 0..999 -> simulatePlay false 100 5. winner rpsStrat1 rpsStratDef|]
printfn "[πŸ“°, 100%%] vs [πŸ‘Š,70%% ;πŸ“°, 10%%; βœ‚, 20%%]=%A" (Array.averageBy fst playstats)
let playstatsb = [|for _ in 0..999 -> simulatePlay false 100 5. winner rpsStratDef rpsNE|]
printfn "\n[πŸ‘Š,70%% ;πŸ“°, 10%%; βœ‚, 20%%] vs [πŸ‘Š,1/3 ;πŸ“°, 1/3; βœ‚, 1/3 ] = %A" (Array.averageBy fst playstatsb)
@sir-deenicus

This comment has been minimized.

Copy link
Owner Author

commented Sep 13, 2017

STRATEGY VS STATIC ADVERSARY

Move Move Probability
Rock 0
Paper 1
Scissors 0

STRATEGY VS STATIC ADVERSARY

Player 1 Player 2 Prob 1 Prob 2 Joint Prob Player 1 Util Player 2 Util
Paper Rock 1 0.7 0.7 0.7 -0.7
Paper Scissors 1 0.2 0.2 -0.2 0.2

Player1: 0.5 | Player2 : -0.5

STATIC ADVERSARY2 VS STATIC ADVERSARY1

Player 1 Player 2 Prob 1 Prob 2 Joint Prob Player 1 Util Player 2 Util
Rock Paper 0.2 0.1 0.02 -0.02 0.02
Rock Scissors 0.2 0.2 0.04 0.04 -0.04
Paper Rock 0.7 0.7 0.49 0.49 -0.49
Paper Scissors 0.7 0.2 0.14 -0.14 0.14
Scissors Rock 0.1 0.7 0.07 -0.07 0.07
Scissors Paper 0.1 0.1 0.01 0.01 -0.01

Player1: 0.31 | Player2 : -0.31

STATIC ADVERSARY1 VS MIXED STRATEGY

Player 1 Player 2 Prob 1 Prob 2 Joint Prob Player 1 Util Player 2 Util
Rock Paper 0.2 0.333 0.067 -0.067 0.067
Rock Scissors 0.2 0.333 0.067 0.067 -0.067
Paper Rock 0.7 0.333 0.233 0.233 -0.233
Paper Scissors 0.7 0.333 0.233 -0.233 0.233
Scissors Rock 0.1 0.333 0.033 -0.033 0.033
Scissors Paper 0.1 0.333 0.033 0.033 -0.033

Player1: 0.0 | Player2 : 0.0

Strategy vs Adversary 100 rounds of $5/rounds
Player1 wins: 210.0 | Player2 wins: -210.0

Player1=Mixed Strategy vs Adversary 100 rounds of $5/rounds
Player1 wins: 5.0 | Player2 wins: -5.0

[πŸ“°, 100%] vs [✊,70% ;πŸ“°, 10%; βœ‚οΈ, 20%]=249.425

[✊,70% ;πŸ“°, 10%; βœ‚οΈ, 20%] vs [✊,1/3 ;πŸ“°, 1/3; βœ‚οΈ, 1/3 ] = 0.175

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.