Skip to content

Instantly share code, notes, and snippets.

@krishnanraman
Last active August 22, 2017 00:53
Show Gist options
  • Save krishnanraman/a151cb01db72ebaee293c379c8217173 to your computer and use it in GitHub Desktop.
Save krishnanraman/a151cb01db72ebaee293c379c8217173 to your computer and use it in GitHub Desktop.
Epsilon Greedy explore-exploit strategy. Always beats random!
object epsilongreedy extends App {
// cmd-line inits
print("Run 10,000 trials with Arm Probabilities (eg. 0.3,0.7): ")
val s = readLine
val armprob:List[Double] = s.split(",").toList.map{ x => x.toDouble }
val arms = armprob.size
val trials = 10000
val epsAll = List(0.01,0.1,0.5,0.9,0.99).map{ eps => Eps(eps, arms)}
val bestArm:Int = armprob.zipWithIndex.maxBy{ x=> x._1}._2
def reward(arm:Int):Int = if (math.random < armprob(arm)) 1 else 0
def randomArm:Int = (math.random*arms).toInt
def randomReward:Int = reward(randomArm)
def maxReward:Int = reward(bestArm)
println("\n\nMax\tRandom\t1%\t10%\t50%\t90%\t99%")
val pulls:List[List[Int]] = (1 to trials).toList.map { t =>
maxReward :: randomReward :: epsAll.map{ x=> x.reward }
}
(0 until pulls.head.size).foreach{ column =>
printf("%d\t", pulls.map{ row => row(column)}.sum)
}
println
}
case class Eps(eps:Double, arms:Int) {
// initialize reward & arm by 1 instead of 0, to avoid a 0/0 problem when finding bestArm
val egRewards = Array.fill[Int](arms)(1) // reward yielded by each arm so far
val egArms = Array.fill[Int](arms)(1) // the number of times each arm was pulled so far
var bestArm:Int = epsilongreedy.randomArm // starting value of the best is random
def updateBest(arm:Int, reward:Int):Unit = {
egArms(arm) = egArms(arm) + 1 // update the arm pull
egRewards(arm) = egRewards(arm) + reward // update the reward for the arm
bestArm = egRewards.zip(egArms) // marry reward to arm pull
.map{ x=> x._1.toDouble/x._2 } // divide reward by number of times the arm was pulled, no 0/0 issues
.zipWithIndex // marry reward to its index
.maxBy{ x=> x._1 } // max reward
._2 // index of max reward
}
// epsilon-greedy algorithm
def reward:Int = {
val (myarm,myreward) = if (math.random < eps) {
// explore
val a = epsilongreedy.randomArm
val r = epsilongreedy.reward(a)
(a,r)
} else {
// exploit
val a = bestArm
val r = epsilongreedy.reward(a)
(a,r)
}
updateBest(myarm,myreward) // THIS IS THE CRUX OF THE ALGO
myreward
}
}
/*
$ scala epsilongreedy
Run 10,000 trials with Arm Probabilities (eg. 0.3,0.7): 0.3,0.7
Max Random 1% 10% 50% 90% 99%
7063 5017 7030 6692 6005 5116 5080
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment