Skip to content

Instantly share code, notes, and snippets.

@sir-deenicus
Created September 13, 2017 23:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sir-deenicus/4f8d8bd1e3a5b0b5b30aa8e51961945f to your computer and use it in GitHub Desktop.
Save sir-deenicus/4f8d8bd1e3a5b0b5b30aa8e51961945f to your computer and use it in GitHub Desktop.
Simple Counter-factual regret minimization example
open Prelude.Common
open System
open Prelude.Math
open Prelude
open SimpleTrees
type Node = {
regretSum : double [] ;
strategySum : double [];
}
let getStrategy weight (strategySum:_[]) regretSum =
let strategy = Array.map (max 0.) regretSum
let nSum, numactions = Array.sum strategy, strategy.Length
let strategy' =
if nSum <= 0. then
Array.create numactions (1./float numactions)
else Array.map (fun x -> x/nSum) strategy
for a in 0..numactions - 1 do strategySum.[a] <- strategySum.[a] + weight * strategy'.[a]
strategy'
let getAvgStrategy strategySum =
let nSum, numactions = Array.sum strategySum, strategySum.Length
if nSum <= 0. then
Array.create numactions (1./float numactions)
else Array.map (fun x -> x/nSum) strategySum
let newNode n = { regretSum = Array.create n 0.; strategySum = Array.create n 0.}
let rec cfr d (nodeMap:Dict<_,_>) reward (actions:_[]) (history:string) p0 p1 =
let player = history.Length % 2
let nummoves = actions.Length
let node =
match nodeMap.tryFind history with
| None -> let n = newNode nummoves in nodeMap.Add(history,n) ; n
| Some n -> n
match reward history with
| Some r -> r
| None ->
let strategy = getStrategy (if player = 0 then p0 else p1) node.strategySum node.regretSum
let util = Array.create nummoves 0.
let mutable nodeUtil = 0.
for a in 0..nummoves - 1 do
let h' = history + actions.[a]
util.[a] <- if player = 0 then
cfr (d+1) nodeMap reward actions h' (p0 * strategy.[a]) p1
else cfr (d+1) nodeMap reward actions h' p0 (p1 * strategy.[a])
nodeUtil <- nodeUtil + strategy.[a] * util.[a]
for a in 0..nummoves - 1 do
let regret = util.[a] - nodeUtil;
node.regretSum.[a] <- node.regretSum.[a] + (if player = 0 then p1 else p0) * regret
nodeUtil
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment