Created
August 23, 2017 23:35
-
-
Save krishnanraman/c101f98b439ba05ecfc666c57679caea to your computer and use it in GitHub Desktop.
Using the epsilon greedy algorithm to figure out the true optimal bid
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object variableBid extends App { | |
printf("Min Bid:") | |
val min = readDouble | |
printf("Max Bid:") | |
val max = readDouble | |
printf("Number of arms:") | |
val arms = readInt | |
printf("Trials (say 1000):") | |
val trials = readInt | |
printf("Epsilon (say 0.01):") | |
val epsilon = readDouble | |
// associate a bid with each arm | |
val incr = (max-min).toDouble/(arms-1) | |
val bids = (min to max by incr).toList // arm x has bid equal to bids(x) | |
val eps = Eps(epsilon, arms) | |
def randomArm:Int = (math.random*arms).toInt | |
// This is my fake static database. The real databases reside on AWS, they will get updated over time etc. | |
val bidDB = List.tabulate[Int](bids.size){ x=> (math.random * 1000).toInt } // how many times we bid a particular amount | |
val clickDB = List.tabulate[Int](bids.size){ x=> (math.random * bidDB(x)).toInt } // how many clicks we got | |
val allCTR = clickDB.zip(bidDB).map{ x=> x._1.toDouble/x._2} | |
val bestArm:Int = allCTR.zipWithIndex.maxBy{ x=> x._1}._2 | |
val bestCTR:Double = allCTR.max | |
def reward(arm:Int):Int = { | |
// grab the bid for this arm | |
val myBid = bids(arm) | |
// hit the DB and find out two things | |
// 1. N = how many times we bid this amount | |
// 2. C = how many clicks we got in total | |
val N = bidDB(arm) // THE ACTUAL NUMBER OF TIMES WE BID myBid GOES HERE | |
val C = clickDB(arm) // THE ACTUAL NUMBER OF CLICKS WE GOT for myBid GOES HERE | |
val ctr = C.toDouble/N | |
if (math.random < ctr) 1 else 0 | |
} | |
val pulls = List.tabulate[Int](trials){ x=> eps.reward } | |
printf("Our wins in %d trials with %.2f epsilon: %d\n", trials, epsilon , pulls.sum) | |
printf("Max wins possible: %d\n", (bestCTR * trials).toInt) | |
printf("The best arm, as per our epsilon greedy algo: %d\n", eps.bestArm) | |
printf("The best CTR, as per our epsilon greedy algo: %.2f\n", allCTR(eps.bestArm)) | |
printf("The TRUE best arm: %d\n", bestArm) | |
printf("The TRUE best CTR: %.2f\n", bestCTR) | |
println("Bid DB: " + bidDB) | |
println("Click DB: " + clickDB) | |
} | |
case class Eps(eps:Double, arms:Int) { | |
// initiaklize reward & arm by 1 instead of 0, to avoid a 0/0 problem when finding bestArm | |
val egRewards = Array.fill[Int](arms)(1) // reward yielded by each arm so far | |
val egArms = Array.fill[Int](arms)(1) // the number of times each arm was pulled so far | |
var bestArm:Int = variableBid.randomArm // starting value of the best is random | |
def updateBest(arm:Int, reward:Int):Unit = { | |
egArms(arm) = egArms(arm) + 1 // update the arm pull | |
egRewards(arm) = egRewards(arm) + reward // update the reward for the arm | |
bestArm = egRewards.zip(egArms) // marry reward to arm pull | |
.map{ x=> x._1.toDouble/x._2 } // divide reward by number of times the arm was pulled, no 0/0 issues | |
.zipWithIndex // marry reward to its index | |
.maxBy{ x=> x._1 } // max reward | |
._2 // index of max reward | |
} | |
// epsilon-greedy algorithm | |
def reward:Int = { | |
val (myarm,myreward) = if (math.random < eps) { | |
// explore | |
val a = variableBid.randomArm | |
val r = variableBid.reward(a) | |
(a,r) | |
} else { | |
// exploit | |
val a = bestArm | |
val r = variableBid.reward(a) | |
(a,r) | |
} | |
updateBest(myarm,myreward) // THIS IS THE CRUX OF THE ALGO | |
myreward | |
} | |
} | |
/* USAGE: TWO EXAMPLES | |
$ scala variableBid | |
Min Bid:0.1 | |
Max Bid:0.5 | |
Number of arms:7 | |
Trials (say 1000):1000 | |
Epsilon (say 0.01):0.01 | |
Our wins in 1000 trials with 0.01 epsilon: 934 | |
Max wins possible: 949 | |
The best arm, as per our epsilon greedy algo: 4 | |
The best CTR, as per our epsilon greedy algo: 0.93 | |
The TRUE best arm: 0 | |
The TRUE best CTR: 0.95 | |
Bid DB: List(536, 715, 768, 549, 595, 581, 793) | |
Click DB: List(509, 162, 653, 225, 555, 58, 458) | |
$ scala variableBid | |
Min Bid:0.1 | |
Max Bid:0.5 | |
Number of arms:7 | |
Trials (say 1000):10000 | |
Epsilon (say 0.01):0.01 | |
Our wins in 10000 trials with 0.01 epsilon: 9355 | |
Max wins possible: 9414 | |
The best arm, as per our epsilon greedy algo: 1 | |
The best CTR, as per our epsilon greedy algo: 0.94 | |
The TRUE best arm: 1 | |
The TRUE best CTR: 0.94 | |
Bid DB: List(152, 820, 673, 996, 42, 736, 999) | |
Click DB: List(39, 772, 275, 606, 21, 263, 629) | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment