Last active
August 22, 2017 00:53
-
-
Save krishnanraman/a151cb01db72ebaee293c379c8217173 to your computer and use it in GitHub Desktop.
Epsilon Greedy explore-exploit strategy. Always beats random!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object epsilongreedy extends App { | |
// cmd-line inits | |
print("Run 10,000 trials with Arm Probabilities (eg. 0.3,0.7): ") | |
val s = readLine | |
val armprob:List[Double] = s.split(",").toList.map{ x => x.toDouble } | |
val arms = armprob.size | |
val trials = 10000 | |
val epsAll = List(0.01,0.1,0.5,0.9,0.99).map{ eps => Eps(eps, arms)} | |
val bestArm:Int = armprob.zipWithIndex.maxBy{ x=> x._1}._2 | |
def reward(arm:Int):Int = if (math.random < armprob(arm)) 1 else 0 | |
def randomArm:Int = (math.random*arms).toInt | |
def randomReward:Int = reward(randomArm) | |
def maxReward:Int = reward(bestArm) | |
println("\n\nMax\tRandom\t1%\t10%\t50%\t90%\t99%") | |
val pulls:List[List[Int]] = (1 to trials).toList.map { t => | |
maxReward :: randomReward :: epsAll.map{ x=> x.reward } | |
} | |
(0 until pulls.head.size).foreach{ column => | |
printf("%d\t", pulls.map{ row => row(column)}.sum) | |
} | |
println | |
} | |
case class Eps(eps:Double, arms:Int) { | |
// initialize reward & arm by 1 instead of 0, to avoid a 0/0 problem when finding bestArm | |
val egRewards = Array.fill[Int](arms)(1) // reward yielded by each arm so far | |
val egArms = Array.fill[Int](arms)(1) // the number of times each arm was pulled so far | |
var bestArm:Int = epsilongreedy.randomArm // starting value of the best is random | |
def updateBest(arm:Int, reward:Int):Unit = { | |
egArms(arm) = egArms(arm) + 1 // update the arm pull | |
egRewards(arm) = egRewards(arm) + reward // update the reward for the arm | |
bestArm = egRewards.zip(egArms) // marry reward to arm pull | |
.map{ x=> x._1.toDouble/x._2 } // divide reward by number of times the arm was pulled, no 0/0 issues | |
.zipWithIndex // marry reward to its index | |
.maxBy{ x=> x._1 } // max reward | |
._2 // index of max reward | |
} | |
// epsilon-greedy algorithm | |
def reward:Int = { | |
val (myarm,myreward) = if (math.random < eps) { | |
// explore | |
val a = epsilongreedy.randomArm | |
val r = epsilongreedy.reward(a) | |
(a,r) | |
} else { | |
// exploit | |
val a = bestArm | |
val r = epsilongreedy.reward(a) | |
(a,r) | |
} | |
updateBest(myarm,myreward) // THIS IS THE CRUX OF THE ALGO | |
myreward | |
} | |
} | |
/* | |
$ scala epsilongreedy | |
Run 10,000 trials with Arm Probabilities (eg. 0.3,0.7): 0.3,0.7 | |
Max Random 1% 10% 50% 90% 99% | |
7063 5017 7030 6692 6005 5116 5080 | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment