krishnanraman/variableBid.scala

## variableBid.scala
object variableBid extends App {
	printf("Min Bid:")
	val min = readDouble
	printf("Max Bid:")
	val max = readDouble
	printf("Number of arms:")
	val arms = readInt
	printf("Trials (say 1000):")
	val trials = readInt
	printf("Epsilon (say 0.01):")
	val epsilon = readDouble


	// associate a bid with each arm
	val incr = (max-min).toDouble/(arms-1)
	val bids = (min to max by incr).toList  // arm x has bid equal to bids(x)

	val eps = Eps(epsilon, arms)

	def randomArm:Int = (math.random*arms).toInt

	// This is my fake static database. The real databases reside on AWS, they will get updated over time etc.
	val bidDB = List.tabulate[Int](bids.size){ x=> (math.random * 1000).toInt } // how many times we bid a particular amount
	val clickDB = List.tabulate[Int](bids.size){ x=> (math.random * bidDB(x)).toInt } // how many clicks we got

	val allCTR = clickDB.zip(bidDB).map{ x=> x._1.toDouble/x._2}
	val bestArm:Int = allCTR.zipWithIndex.maxBy{ x=> x._1}._2
	val bestCTR:Double = allCTR.max

	def reward(arm:Int):Int = {
		// grab the bid for this arm
		val myBid = bids(arm)

		// hit the DB and find out two things
		// 1. N = how many times we bid this amount
		// 2. C = how many clicks we got in total
		val N = bidDB(arm) // THE ACTUAL NUMBER OF TIMES WE BID myBid GOES HERE
		val C = clickDB(arm) // THE ACTUAL NUMBER OF CLICKS WE GOT for myBid GOES HERE
		val ctr = C.toDouble/N

		if (math.random < ctr) 1 else 0
	}

	val pulls = List.tabulate[Int](trials){ x=> eps.reward }
	printf("Our wins in %d trials with %.2f epsilon: %d\n", trials, epsilon , pulls.sum)
	printf("Max wins possible: %d\n", (bestCTR * trials).toInt)
	printf("The best arm, as per our epsilon greedy algo: %d\n", eps.bestArm)
	printf("The best CTR, as per our epsilon greedy algo: %.2f\n", allCTR(eps.bestArm))
	printf("The TRUE best arm: %d\n", bestArm)
	printf("The TRUE best CTR: %.2f\n", bestCTR)

	println("Bid DB: " + bidDB)
	println("Click DB: " + clickDB)

}

case class Eps(eps:Double, arms:Int) {

	// initiaklize reward & arm by 1 instead of 0, to avoid a 0/0 problem when finding bestArm
	val egRewards = Array.fill[Int](arms)(1) // reward yielded by each arm so far
	val egArms = Array.fill[Int](arms)(1) // the number of times each arm was pulled so far


	var bestArm:Int =  variableBid.randomArm	// starting value of the best is random

	def updateBest(arm:Int, reward:Int):Unit = {
		egArms(arm) = egArms(arm) + 1 // update the arm pull
		egRewards(arm) = egRewards(arm) + reward // update the reward for the arm

		bestArm = egRewards.zip(egArms) // marry reward to arm pull
		.map{ x=> x._1.toDouble/x._2 } // divide reward by number of times the arm was pulled, no 0/0 issues
		.zipWithIndex  // marry reward to its index
		.maxBy{ x=> x._1 }  // max reward
		._2  // index of max reward
	}

	// epsilon-greedy algorithm
	def reward:Int = {
		val (myarm,myreward) = if (math.random < eps) {
			// explore
			val a = variableBid.randomArm
			val r = variableBid.reward(a)
			(a,r)
		} else {
			// exploit
			val a = bestArm
			val r = variableBid.reward(a)
			(a,r)
		}
		updateBest(myarm,myreward) // THIS IS THE CRUX OF THE ALGO
		myreward
	}

}

/* USAGE: TWO EXAMPLES

$ scala variableBid
Min Bid:0.1
Max Bid:0.5
Number of arms:7
Trials (say 1000):1000
Epsilon (say 0.01):0.01
Our wins in 1000 trials with 0.01 epsilon: 934
Max wins possible: 949
The best arm, as per our epsilon greedy algo: 4
The best CTR, as per our epsilon greedy algo: 0.93
The TRUE best arm: 0
The TRUE best CTR: 0.95
Bid DB: List(536, 715, 768, 549, 595, 581, 793)
Click DB: List(509, 162, 653, 225, 555, 58, 458)


$ scala variableBid
Min Bid:0.1
Max Bid:0.5
Number of arms:7
Trials (say 1000):10000
Epsilon (say 0.01):0.01
Our wins in 10000 trials with 0.01 epsilon: 9355
Max wins possible: 9414
The best arm, as per our epsilon greedy algo: 1
The best CTR, as per our epsilon greedy algo: 0.94
The TRUE best arm: 1
The TRUE best CTR: 0.94
Bid DB: List(152, 820, 673, 996, 42, 736, 999)
Click DB: List(39, 772, 275, 606, 21, 263, 629)
*/
	object variableBid extends App {
	printf("Min Bid:")
	val min = readDouble
	printf("Max Bid:")
	val max = readDouble
	printf("Number of arms:")
	val arms = readInt
	printf("Trials (say 1000):")
	val trials = readInt
	printf("Epsilon (say 0.01):")
	val epsilon = readDouble


	// associate a bid with each arm
	val incr = (max-min).toDouble/(arms-1)
	val bids = (min to max by incr).toList // arm x has bid equal to bids(x)

	val eps = Eps(epsilon, arms)

	def randomArm:Int = (math.random*arms).toInt

	// This is my fake static database. The real databases reside on AWS, they will get updated over time etc.
	val bidDB = List.tabulate[Int](bids.size){ x=> (math.random * 1000).toInt } // how many times we bid a particular amount
	val clickDB = List.tabulate[Int](bids.size){ x=> (math.random * bidDB(x)).toInt } // how many clicks we got

	val allCTR = clickDB.zip(bidDB).map{ x=> x._1.toDouble/x._2}
	val bestArm:Int = allCTR.zipWithIndex.maxBy{ x=> x._1}._2
	val bestCTR:Double = allCTR.max

	def reward(arm:Int):Int = {
	// grab the bid for this arm
	val myBid = bids(arm)

	// hit the DB and find out two things
	// 1. N = how many times we bid this amount
	// 2. C = how many clicks we got in total
	val N = bidDB(arm) // THE ACTUAL NUMBER OF TIMES WE BID myBid GOES HERE
	val C = clickDB(arm) // THE ACTUAL NUMBER OF CLICKS WE GOT for myBid GOES HERE
	val ctr = C.toDouble/N

	if (math.random < ctr) 1 else 0
	}

	val pulls = List.tabulate[Int](trials){ x=> eps.reward }
	printf("Our wins in %d trials with %.2f epsilon: %d\n", trials, epsilon , pulls.sum)
	printf("Max wins possible: %d\n", (bestCTR * trials).toInt)
	printf("The best arm, as per our epsilon greedy algo: %d\n", eps.bestArm)
	printf("The best CTR, as per our epsilon greedy algo: %.2f\n", allCTR(eps.bestArm))
	printf("The TRUE best arm: %d\n", bestArm)
	printf("The TRUE best CTR: %.2f\n", bestCTR)

	println("Bid DB: " + bidDB)
	println("Click DB: " + clickDB)

	}

	case class Eps(eps:Double, arms:Int) {

	// initiaklize reward & arm by 1 instead of 0, to avoid a 0/0 problem when finding bestArm
	val egRewards = Array.fill[Int](arms)(1) // reward yielded by each arm so far
	val egArms = Array.fill[Int](arms)(1) // the number of times each arm was pulled so far


	var bestArm:Int = variableBid.randomArm // starting value of the best is random

	def updateBest(arm:Int, reward:Int):Unit = {
	egArms(arm) = egArms(arm) + 1 // update the arm pull
	egRewards(arm) = egRewards(arm) + reward // update the reward for the arm

	bestArm = egRewards.zip(egArms) // marry reward to arm pull
	.map{ x=> x._1.toDouble/x._2 } // divide reward by number of times the arm was pulled, no 0/0 issues
	.zipWithIndex // marry reward to its index
	.maxBy{ x=> x._1 } // max reward
	._2 // index of max reward
	}

	// epsilon-greedy algorithm
	def reward:Int = {
	val (myarm,myreward) = if (math.random < eps) {
	// explore
	val a = variableBid.randomArm
	val r = variableBid.reward(a)
	(a,r)
	} else {
	// exploit
	val a = bestArm
	val r = variableBid.reward(a)
	(a,r)
	}
	updateBest(myarm,myreward) // THIS IS THE CRUX OF THE ALGO
	myreward
	}

	}

	/* USAGE: TWO EXAMPLES

	$ scala variableBid
	Min Bid:0.1
	Max Bid:0.5
	Number of arms:7
	Trials (say 1000):1000
	Epsilon (say 0.01):0.01
	Our wins in 1000 trials with 0.01 epsilon: 934
	Max wins possible: 949
	The best arm, as per our epsilon greedy algo: 4
	The best CTR, as per our epsilon greedy algo: 0.93
	The TRUE best arm: 0
	The TRUE best CTR: 0.95
	Bid DB: List(536, 715, 768, 549, 595, 581, 793)
	Click DB: List(509, 162, 653, 225, 555, 58, 458)


	$ scala variableBid
	Min Bid:0.1
	Max Bid:0.5
	Number of arms:7
	Trials (say 1000):10000
	Epsilon (say 0.01):0.01
	Our wins in 10000 trials with 0.01 epsilon: 9355
	Max wins possible: 9414
	The best arm, as per our epsilon greedy algo: 1
	The best CTR, as per our epsilon greedy algo: 0.94
	The TRUE best arm: 1
	The TRUE best CTR: 0.94
	Bid DB: List(152, 820, 673, 996, 42, 736, 999)
	Click DB: List(39, 772, 275, 606, 21, 263, 629)
	*/