Skip to content

Instantly share code, notes, and snippets.

@krishnanraman
Created August 23, 2017 23:35
Show Gist options
  • Save krishnanraman/c101f98b439ba05ecfc666c57679caea to your computer and use it in GitHub Desktop.
Save krishnanraman/c101f98b439ba05ecfc666c57679caea to your computer and use it in GitHub Desktop.
Using the epsilon greedy algorithm to figure out the true optimal bid
object variableBid extends App {
printf("Min Bid:")
val min = readDouble
printf("Max Bid:")
val max = readDouble
printf("Number of arms:")
val arms = readInt
printf("Trials (say 1000):")
val trials = readInt
printf("Epsilon (say 0.01):")
val epsilon = readDouble
// associate a bid with each arm
val incr = (max-min).toDouble/(arms-1)
val bids = (min to max by incr).toList // arm x has bid equal to bids(x)
val eps = Eps(epsilon, arms)
def randomArm:Int = (math.random*arms).toInt
// This is my fake static database. The real databases reside on AWS, they will get updated over time etc.
val bidDB = List.tabulate[Int](bids.size){ x=> (math.random * 1000).toInt } // how many times we bid a particular amount
val clickDB = List.tabulate[Int](bids.size){ x=> (math.random * bidDB(x)).toInt } // how many clicks we got
val allCTR = clickDB.zip(bidDB).map{ x=> x._1.toDouble/x._2}
val bestArm:Int = allCTR.zipWithIndex.maxBy{ x=> x._1}._2
val bestCTR:Double = allCTR.max
def reward(arm:Int):Int = {
// grab the bid for this arm
val myBid = bids(arm)
// hit the DB and find out two things
// 1. N = how many times we bid this amount
// 2. C = how many clicks we got in total
val N = bidDB(arm) // THE ACTUAL NUMBER OF TIMES WE BID myBid GOES HERE
val C = clickDB(arm) // THE ACTUAL NUMBER OF CLICKS WE GOT for myBid GOES HERE
val ctr = C.toDouble/N
if (math.random < ctr) 1 else 0
}
val pulls = List.tabulate[Int](trials){ x=> eps.reward }
printf("Our wins in %d trials with %.2f epsilon: %d\n", trials, epsilon , pulls.sum)
printf("Max wins possible: %d\n", (bestCTR * trials).toInt)
printf("The best arm, as per our epsilon greedy algo: %d\n", eps.bestArm)
printf("The best CTR, as per our epsilon greedy algo: %.2f\n", allCTR(eps.bestArm))
printf("The TRUE best arm: %d\n", bestArm)
printf("The TRUE best CTR: %.2f\n", bestCTR)
println("Bid DB: " + bidDB)
println("Click DB: " + clickDB)
}
case class Eps(eps:Double, arms:Int) {
// initiaklize reward & arm by 1 instead of 0, to avoid a 0/0 problem when finding bestArm
val egRewards = Array.fill[Int](arms)(1) // reward yielded by each arm so far
val egArms = Array.fill[Int](arms)(1) // the number of times each arm was pulled so far
var bestArm:Int = variableBid.randomArm // starting value of the best is random
def updateBest(arm:Int, reward:Int):Unit = {
egArms(arm) = egArms(arm) + 1 // update the arm pull
egRewards(arm) = egRewards(arm) + reward // update the reward for the arm
bestArm = egRewards.zip(egArms) // marry reward to arm pull
.map{ x=> x._1.toDouble/x._2 } // divide reward by number of times the arm was pulled, no 0/0 issues
.zipWithIndex // marry reward to its index
.maxBy{ x=> x._1 } // max reward
._2 // index of max reward
}
// epsilon-greedy algorithm
def reward:Int = {
val (myarm,myreward) = if (math.random < eps) {
// explore
val a = variableBid.randomArm
val r = variableBid.reward(a)
(a,r)
} else {
// exploit
val a = bestArm
val r = variableBid.reward(a)
(a,r)
}
updateBest(myarm,myreward) // THIS IS THE CRUX OF THE ALGO
myreward
}
}
/* USAGE: TWO EXAMPLES
$ scala variableBid
Min Bid:0.1
Max Bid:0.5
Number of arms:7
Trials (say 1000):1000
Epsilon (say 0.01):0.01
Our wins in 1000 trials with 0.01 epsilon: 934
Max wins possible: 949
The best arm, as per our epsilon greedy algo: 4
The best CTR, as per our epsilon greedy algo: 0.93
The TRUE best arm: 0
The TRUE best CTR: 0.95
Bid DB: List(536, 715, 768, 549, 595, 581, 793)
Click DB: List(509, 162, 653, 225, 555, 58, 458)
$ scala variableBid
Min Bid:0.1
Max Bid:0.5
Number of arms:7
Trials (say 1000):10000
Epsilon (say 0.01):0.01
Our wins in 10000 trials with 0.01 epsilon: 9355
Max wins possible: 9414
The best arm, as per our epsilon greedy algo: 1
The best CTR, as per our epsilon greedy algo: 0.94
The TRUE best arm: 1
The TRUE best CTR: 0.94
Bid DB: List(152, 820, 673, 996, 42, 736, 999)
Click DB: List(39, 772, 275, 606, 21, 263, 629)
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment