krishnanraman/epsilongreedy.scala

## epsilongreedy.scala
object epsilongreedy extends App {
	// cmd-line inits
	print("Run 10,000 trials with Arm Probabilities (eg. 0.3,0.7): ")
	val s = readLine
	val armprob:List[Double] = s.split(",").toList.map{ x => x.toDouble }
	val arms = armprob.size
	val trials = 10000
	val epsAll = List(0.01,0.1,0.5,0.9,0.99).map{ eps => Eps(eps, arms)}
	val bestArm:Int = armprob.zipWithIndex.maxBy{ x=> x._1}._2

	def reward(arm:Int):Int = if (math.random < armprob(arm)) 1 else 0
	def randomArm:Int = (math.random*arms).toInt
	def randomReward:Int = reward(randomArm)
	def maxReward:Int = reward(bestArm)

	println("\n\nMax\tRandom\t1%\t10%\t50%\t90%\t99%")
	val pulls:List[List[Int]] = (1 to trials).toList.map { t =>
		maxReward :: randomReward :: epsAll.map{ x=> x.reward }
	}
	(0 until pulls.head.size).foreach{ column =>
		printf("%d\t", pulls.map{ row => row(column)}.sum)
	}
	println
}

case class Eps(eps:Double, arms:Int) {

	// initialize reward & arm by 1 instead of 0, to avoid a 0/0 problem when finding bestArm
	val egRewards = Array.fill[Int](arms)(1) // reward yielded by each arm so far
	val egArms = Array.fill[Int](arms)(1) // the number of times each arm was pulled so far


	var bestArm:Int =  epsilongreedy.randomArm	// starting value of the best is random

	def updateBest(arm:Int, reward:Int):Unit = {
		egArms(arm) = egArms(arm) + 1 // update the arm pull
		egRewards(arm) = egRewards(arm) + reward // update the reward for the arm

		bestArm = egRewards.zip(egArms) // marry reward to arm pull
		.map{ x=> x._1.toDouble/x._2 } // divide reward by number of times the arm was pulled, no 0/0 issues
		.zipWithIndex  // marry reward to its index
		.maxBy{ x=> x._1 }  // max reward
		._2  // index of max reward
	}

	// epsilon-greedy algorithm
	def reward:Int = {
		val (myarm,myreward) = if (math.random < eps) {
			// explore
			val a = epsilongreedy.randomArm
			val r = epsilongreedy.reward(a)
			(a,r)
		} else {
			// exploit
			val a = bestArm
			val r = epsilongreedy.reward(a)
			(a,r)
		}
		updateBest(myarm,myreward) // THIS IS THE CRUX OF THE ALGO
		myreward
	}

}


/*
$ scala epsilongreedy
Run 10,000 trials with Arm Probabilities (eg. 0.3,0.7): 0.3,0.7


Max		Random	1%		10%		50%		90%		99%
7063	5017	7030	6692	6005	5116	5080
*/
	object epsilongreedy extends App {
	// cmd-line inits
	print("Run 10,000 trials with Arm Probabilities (eg. 0.3,0.7): ")
	val s = readLine
	val armprob:List[Double] = s.split(",").toList.map{ x => x.toDouble }
	val arms = armprob.size
	val trials = 10000
	val epsAll = List(0.01,0.1,0.5,0.9,0.99).map{ eps => Eps(eps, arms)}
	val bestArm:Int = armprob.zipWithIndex.maxBy{ x=> x._1}._2

	def reward(arm:Int):Int = if (math.random < armprob(arm)) 1 else 0
	def randomArm:Int = (math.random*arms).toInt
	def randomReward:Int = reward(randomArm)
	def maxReward:Int = reward(bestArm)

	println("\n\nMax\tRandom\t1%\t10%\t50%\t90%\t99%")
	val pulls:List[List[Int]] = (1 to trials).toList.map { t =>
	maxReward :: randomReward :: epsAll.map{ x=> x.reward }
	}
	(0 until pulls.head.size).foreach{ column =>
	printf("%d\t", pulls.map{ row => row(column)}.sum)
	}
	println
	}

	case class Eps(eps:Double, arms:Int) {

	// initialize reward & arm by 1 instead of 0, to avoid a 0/0 problem when finding bestArm
	val egRewards = Array.fill[Int](arms)(1) // reward yielded by each arm so far
	val egArms = Array.fill[Int](arms)(1) // the number of times each arm was pulled so far


	var bestArm:Int = epsilongreedy.randomArm // starting value of the best is random

	def updateBest(arm:Int, reward:Int):Unit = {
	egArms(arm) = egArms(arm) + 1 // update the arm pull
	egRewards(arm) = egRewards(arm) + reward // update the reward for the arm

	bestArm = egRewards.zip(egArms) // marry reward to arm pull
	.map{ x=> x._1.toDouble/x._2 } // divide reward by number of times the arm was pulled, no 0/0 issues
	.zipWithIndex // marry reward to its index
	.maxBy{ x=> x._1 } // max reward
	._2 // index of max reward
	}

	// epsilon-greedy algorithm
	def reward:Int = {
	val (myarm,myreward) = if (math.random < eps) {
	// explore
	val a = epsilongreedy.randomArm
	val r = epsilongreedy.reward(a)
	(a,r)
	} else {
	// exploit
	val a = bestArm
	val r = epsilongreedy.reward(a)
	(a,r)
	}
	updateBest(myarm,myreward) // THIS IS THE CRUX OF THE ALGO
	myreward
	}

	}


	/*
	$ scala epsilongreedy
	Run 10,000 trials with Arm Probabilities (eg. 0.3,0.7): 0.3,0.7


	Max Random 1% 10% 50% 90% 99%
	7063 5017 7030 6692 6005 5116 5080
	*/