-
-
Save luxbock/da22767ef16af6ebc5dc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns cfr.rps | |
(:require [clojure.core.matrix :as m])) | |
(set! *warn-on-reflection* true) | |
(defn create-utility-fn | |
"Given a payoff-matrix creates a utility function for the game. The utility | |
function accepts two strategy vectors as its arguments and returns the utility | |
for the first player in question." | |
[m] | |
(fn [sa sb] | |
(let [prob-m | |
(m/compute-matrix | |
(map count [sa sb]) | |
#(* (nth sa %1) (nth sb %2)))] | |
(m/ereduce + (m/mul prob-m m))))) | |
(defn regret | |
"Given a utility function and three strategy vectors, returns the regret for | |
player having played his strategy `sa' instead of `sb' against his opponents `so'" | |
[uf sa sb so] | |
(- (uf sb so) (uf sa so))) | |
(defn action-profile [ n] | |
"An action profile is the list of pure strategies available to a player." | |
(map #(m/mset (repeat n 0) % 1) (range n))) | |
(defn regret-profile | |
"Given a utility function and strategies for both players, this function | |
returns the regret for all the pure-strategies the first player could have | |
played, including the strategy he did play." | |
[uf sa so] | |
(map #(regret uf sa % so) (action-profile (count sa)))) | |
(defn normalise-strategies | |
[nsum strat] | |
(if (pos? nsum) | |
(map #(/ % nsum) strat) | |
(repeat (count strat) (/ (count strat))))) | |
(defn new-strategy | |
"Creates a new strategy based on the regrets experienced by the player." | |
[rgr] | |
(let [n (count rgr) | |
strat (map #(if (pos? (nth rgr %)) (nth rgr %) 0) (range n)) | |
nsum (apply + strat)] | |
(normalise-strategies nsum strat))) | |
(defn cumulative-probabilities | |
"Takes a collection of probabilities (that sum up to one) and turns it into a | |
sequence of cumulative probabilities." | |
[coll] | |
(reduce #(conj %1 (+ %2 (last %1))) [0] coll)) | |
(defn choose-action | |
"Given a strategy vector, chooses an action to play based on its probability." | |
[strat] | |
(let [cp (cumulative-probabilities strat) | |
r (rand) | |
index (dec (count (take-while #(> r %) cp)))] | |
(m/mset (repeat (count strat) 0) index 1))) | |
(defn avarage-strategy | |
"Given a vector where each index maps to how often a certain strategy has been | |
played, returns the frequency of each strategy as a part of the total." | |
[ssum] | |
(let [nsum (reduce + ssum)] | |
(normalise-strategies nsum ssum))) | |
(defn cfrm-br | |
"Given a utility function, number of iterations and a strategy for the | |
opponent, performs the Counterfactual Regret Minimization algorithm to find | |
the best response to the strategy in question." | |
[uf n sb] | |
(loop [i 0 | |
reg-a [0 0 0] | |
ssum [0 0 0]] | |
(if (= i n) | |
(avarage-strategy ssum) | |
(let [strat-a (choose-action (new-strategy reg-a)) | |
strat-b sb] | |
(recur (inc i) | |
(m/add reg-a (regret-profile uf strat-a strat-b)) | |
(m/add ssum strat-a)))))) | |
(defn cfrm-ne | |
"Given a utility function and a number of iterations to perform, performs the | |
Counterfactual Regret Minimization algorithm to find an approximation of the | |
Nash Equilibrium for the game." | |
[uf n] | |
(loop [i (int 0) | |
reg-a [0 0 0] | |
reg-b [0 0 0] | |
ssum [0 0 0]] | |
(if (= i n) | |
(avarage-strategy ssum) | |
(let [strat-a (choose-action (new-strategy reg-a)) | |
strat-b (choose-action (new-strategy reg-b))] | |
(recur (inc i) | |
(m/add reg-a (regret-profile uf strat-a strat-b)) | |
(m/add reg-b (regret-profile uf strat-b strat-a)) | |
(m/add ssum strat-a)))))) | |
(comment | |
(def rps | |
(create-utility-fn [[0, -1, 1] | |
[1, 0, -1] | |
[-1, 1, 0]])) | |
(cfrm-ne rps 100000) | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns cfr.rps-tweak | |
(:require [clojure.core.matrix :as m] | |
[primitive-math :as pm])) | |
(set! *warn-on-reflection* true) | |
(m/set-current-implementation :vectorz) | |
(defn create-utility-fn | |
"Given a payoff-matrix creates a utility function for the game. The utility | |
function accepts two strategy vectors as its arguments and returns the utility | |
for the first player in question." | |
[m] | |
(fn ^double [sa sb] | |
(let [prob-m | |
(m/compute-matrix | |
(map m/ecount [sa sb]) | |
#(pm/* ^double (m/mget sa %1) ^double (m/mget sb %2)))] | |
(m/ereduce + (m/mul prob-m m))))) | |
(defn regret | |
"Given a utility function and three strategy vectors, returns the regret for | |
player having played his strategy `sa' instead of `sb' against his opponents `so'" | |
[uf sa sb so] | |
(pm/- ^double (uf sb so) ^double (uf sa so))) | |
(defn action-profile [n] | |
"An action profile is the list of pure strategies available to a player." | |
(map #(m/mset (repeat n 0) % 1) (range n))) | |
(defn regret-profile | |
"Given a utility function and strategies for both players, this function | |
returns the regret for all the pure-strategies the first player could have | |
played, including the strategy he did play." | |
[uf sa so] | |
(map #(regret uf sa % so) (action-profile (m/ecount sa)))) | |
(defn normalise-strategies | |
[nsum strat] | |
(if (pm/> ^double nsum 0.0) | |
(map #(pm/div ^double % ^double nsum) strat) | |
(repeat (m/ecount strat) (pm/div (m/ecount strat))))) | |
(defn new-strategy | |
"Creates a new strategy based on the regrets experienced by the player." | |
[rgr] | |
(let [n (m/ecount rgr) | |
strat (map #(if (pos? (m/mget rgr %)) (m/mget rgr %) 0) (range n)) | |
nsum (reduce + strat)] | |
(normalise-strategies nsum strat))) | |
(defn cumulative-probabilities | |
"Takes a collection of probabilities (that sum up to one) and turns it into a | |
sequence of cumulative probabilities." | |
[coll] | |
(reduce #(conj %1 (+ %2 (last %1))) [0] coll)) | |
(defn choose-action | |
"Given a strategy vector, chooses an action to play based on its probability." | |
[^doubles strat] | |
(let [cp (cumulative-probabilities strat) | |
r (rand) | |
index (pm/dec ^long (m/ecount (take-while #(pm/> ^double r ^double %) cp)))] | |
(m/mset (repeat (m/ecount strat) 0) index 1))) | |
(defn avarage-strategy | |
"Given a vector where each index maps to how often a certain strategy has been | |
played, returns the frequency of each strategy as a part of the total." | |
[ssum] | |
(let [nsum (reduce + ssum)] | |
(normalise-strategies nsum ssum))) | |
(defn cfrm-be | |
"Given a utility function, number of iterations and a strategy for the | |
opponent, performs the Counterfactual Regret Minimization algorithm to find | |
the best response to the strategy in question." | |
[uf n sb] | |
(let [n (int n)] | |
(loop [i (int 0) | |
reg-a (m/array [0 0 0]) | |
ssum (m/array [0 0 0])] | |
(if (pm/== i n) | |
(avarage-strategy ssum) | |
(let [strat-a (choose-action (new-strategy reg-a)) | |
strat-b sb] | |
(recur (pm/inc i) | |
(m/add reg-a (regret-profile uf strat-a strat-b)) | |
(m/add ssum strat-a))))))) | |
(defn cfrm-ne | |
"Given a utility function and a number of iterations to perform, performs the | |
Counterfactual Regret Minimization algorithm to find an approximation of the | |
Nash Equilibrium for the game." | |
[uf n] | |
(let [n (int n)] | |
(loop [i (int 0) | |
reg-a (m/array [0 0 0]) | |
reg-b (m/array [0 0 0]) | |
ssum (m/array [0 0 0])] | |
(if (pm/== i n) | |
(avarage-strategy ssum) | |
(let [strat-a (choose-action (new-strategy reg-a)) | |
strat-b (choose-action (new-strategy reg-b))] | |
(recur (pm/inc i) | |
(m/add reg-a (regret-profile uf strat-a strat-b)) | |
(m/add reg-b (regret-profile uf strat-b strat-a)) | |
(m/add ssum strat-a))))))) | |
(comment | |
(def rps | |
(create-utility-fn [[0, -1, 1] | |
[1, 0, -1] | |
[-1, 1, 0]])) | |
(cfrm-ne rps 100000) | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Arrays; | |
import java.util.Random; | |
public class RPSTrainer { | |
public static final int ROCK = 0, PAPER = 1, SCISSORS = 2, NUM_ACTIONS = 3; | |
public static final Random random = new Random(); | |
double[] regretSum = new double[NUM_ACTIONS], | |
strategy = new double[NUM_ACTIONS], | |
strategySum = new double[NUM_ACTIONS], | |
oppStrategy = { 0.4, 0.3, 0.3 }; | |
private double[] getStrategy() { | |
double normalizingSum = 0; | |
for (int a = 0; a < NUM_ACTIONS; a++) { | |
strategy[a] = regretSum[a] > 0 ? regretSum[a] : 0; | |
normalizingSum += strategy[a]; | |
} | |
for (int a = 0; a < NUM_ACTIONS; a++) { | |
if (normalizingSum > 0) | |
strategy[a] /= normalizingSum; | |
else | |
strategy[a] = 1.0 / NUM_ACTIONS; | |
strategySum[a] += strategy[a]; | |
} | |
return strategy; | |
} | |
public int getAction(double[] strategy) { | |
double r = random.nextDouble(); | |
int a = 0; | |
double cumulativeProbability = 0; | |
while (a < NUM_ACTIONS - 1) { | |
cumulativeProbability += strategy[a]; | |
if (r < cumulativeProbability) | |
break; | |
a++; | |
} | |
return a; | |
} | |
public void train(int iterations) { | |
double[] actionUtility = new double[NUM_ACTIONS]; | |
for (int i = 0; i < iterations; i++) { | |
double[] strategy = getStrategy(); | |
int myAction = getAction(strategy); | |
int otherAction = getAction(oppStrategy); | |
actionUtility[otherAction] = 0; | |
actionUtility[otherAction == NUM_ACTIONS - 1 ? 0 : otherAction + 1] = 1; | |
actionUtility[otherAction == 0 ? NUM_ACTIONS - 1 : otherAction - 1] = -1; | |
for (int a = 0; a < NUM_ACTIONS; a++) | |
regretSum[a] += actionUtility[a] - actionUtility[myAction]; | |
} | |
} | |
public double[] getAverageStrategy() { | |
double[] avgStrategy = new double[NUM_ACTIONS]; | |
double normalizingSum = 0; | |
for (int a = 0; a < NUM_ACTIONS; a++) | |
normalizingSum += strategySum[a]; | |
for (int a = 0; a < NUM_ACTIONS; a++) | |
if (normalizingSum > 0) | |
avgStrategy[a] = strategySum[a] / normalizingSum; | |
else | |
avgStrategy[a] = 1.0 / NUM_ACTIONS; | |
return avgStrategy; | |
} | |
public static void main(String[] args) { | |
RPSTrainer trainer = new RPSTrainer(); | |
trainer.train(1000000); | |
System.out.println(Arrays.toString(trainer.getAverageStrategy())); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment