twolfe18/gist:7207779

## gistfile1.scala
trait Func0[V, P] {
	def value(p: P): V
	def dimension: Int
}

trait Func1[V, G, P] extends Func0[V, P] {
	def grad(p: P): G = compute(p)._2
	def compute(p: P): (V, G)
}

trait Minimizer0[V, P] { def minimize(f: Func0[V, P]): P }
trait Minimizer1[V, G, P] { def minimize(f: Func1[V, G, P]): P }

class SGD(val learningRate: Double = 0.1d, val tol: Double = 1e-6, val doLineSearch: Boolean = false)
		extends Minimizer1[Double, Array[Double], Array[Double]] {
	lazy val ls = new LineSearch
	override def minimize(f: Func1[Double, Array[Double], Array[Double]]): Array[Double] = {
		val p = Array.ofDim[Double](f.dimension)
		var progress = 999999d
		var vPrev = 999999d
		while(progress > tol) {
			val (v, g) = f.compute(p)
			if(doLineSearch)
				p = ls.search(f, p, g)
			else
				p -= learningRate * g
			progress = vPrev - v
			vPrev = v
		}
		p
	}
}

class LineSearch {
	def search(f: Func0[Double, Array[Double]], starting: Array[Double], direction: Array[Double]): Array[Double] = {
		val p = Arrays.copyOf(starting)
		var step = 1d
		var best = (99999d, p)
		while(step > 1e-6) {
			val v = f.value(p)
			if(v < best._1)
				best = (v, p.copy)
			step *= 0.3d
			p = starting + step * direction
		}
		best._2
	}
}


// this class has ancillary data that it hides
// this data is added with promote
trait HidesState[-LessInfo, +MoreInfo] {
	def promote(x: LessInfo): MoreInfo
	def demote(x: MoreInfo): LessInfo
}
// if you don't need conversions (e.g. no batches)
implicit def nothingToHide[T] = new HidesState[T, T] {
	def promote(t: T): T = t
	def demote(t: T): T = t
}
class Minimizer1Scala[V, G, P, R] {
	def minimize(f: Func1[V, G, P])(implicit buildPoint: HidesState[R, P]): R
}
class SGDScala extends Minimizer1Scala[Double, Array[Double], PointWithBatch, Array[Double]] {
	def minimize(f: Func1[Double, Array[Double], PointWithBatch])
			(implicit batch: HidesState[Array[Double], PointWithBatch]): Array[Double] = {

		// in this example, batch stores the state that is in PointWithBatch
		// that isn't just the Array[Double].
		// we can build the Array[Double] by just knowing the dimension of the function
		val p: Array[Double] = Array.ofDim[Double](f.dimension)

		// the function needs to know the batch, so we let batch annotate
		// the Array[Double] that we made
		val pWithBatch: PointWithBatch = batch.promote(p)
		val (v, g) = f.compute(pWithBatch)

		// this allows us to resolve the seemingly conflicting requirements:
		// 1) the function needs to take a batch
		// 2) this class needs to instantiate new points as Array[Double]

		// when we finally find a PointWithBatch we like, we return
		val r: Array[Double] = batch.demote(pWithBatch)
		return r

		// ok, i believe we have officially hit over-engineering
	}
}


## gistfile2.scala
// decide on one of these, but is sort of orthogonal to this code
type Solution = Vector
type Solution = (Vector, Double)	// solution + f(solution)

trait Optimizer {
	def optimize(problem: OptimizationProblem): Solution
}

/**
 * convenient methods for creating OptimizationProblems
 */
object OptimizationProblem {
	def maximize(f: Func): OptimizationProblem[Func]
	def minimize(f: Func): OptimizationProblem[Func]
}

trait OptimizationProblem[F <: Func] {

	def objective: F

	def constraints: Seq[Constraint]

	// choose one of the following
	// i'm biased towards the first because it's more general
	def compare(solA: Solution, solB: Solution): Double
	def shouldMaximize: Boolean

	def timeLimit: Option[Time]
	def memLimit: Option[Memory]
	def cpuLimit: Option[Int]
	// etc
}

/**
 * for a new Optimizer that you implement,
 * if you want to tie into the testing code, you
 * need to implement one of these
 */
trait OptimizerFactory {

	/**
	 * can inspect the problem to set parameters
	 * e.g. maybe having short time limit means you'll set some Optimizer settings differently
	 */
	def defaultOptimizer(problem: OptimizationProblem): Optimizer

	/**
	 * return a grid of parameters that you think will work for this problem
	 */
	def gridOfOptimizers(problem: OptimizatinoProblem, maxOptimizers: Int): Seq[Optimzer]
}

/**
 * TODO: add logging to show which settings are doing well
 * this class might dispatch optimization jobs to different grid machines!
 * this class might also be a nice home for GP code
 * (in which case we might change the name to something like MetaOptimzer)
 * actually probably better to describe the GP stuff as an Optimizer itself (over hyper-params)
 * and keep the semantics of this class purely for calling Optimzers (e.g. necessarily the "top")
 */
trait Sweeper {
	def sweep(problem: OptimizationProblem, optFactory: OptimizationFactory, n: Int = 10): (Solution, Optimizer) =
		sweep(problem, optFactory.gridOfOptimizers(problem, n))
	def sweep(problem: OptimizationProblem, optimizers: Seq[Optimizer]): (Solution, Optimizer) = {
		var best: (Solution, Optimizer) = null
		for(opt <- optimizers) {
			val sol = opt.optimize(problem)
			if(best == null || problem.compare(sol, best) > 0d)
				best = (sol, opt)
		}
		return best
	}
}
	trait Func0[V, P] {
	def value(p: P): V
	def dimension: Int
	}

	trait Func1[V, G, P] extends Func0[V, P] {
	def grad(p: P): G = compute(p)._2
	def compute(p: P): (V, G)
	}

	trait Minimizer0[V, P] { def minimize(f: Func0[V, P]): P }
	trait Minimizer1[V, G, P] { def minimize(f: Func1[V, G, P]): P }

	class SGD(val learningRate: Double = 0.1d, val tol: Double = 1e-6, val doLineSearch: Boolean = false)
	extends Minimizer1[Double, Array[Double], Array[Double]] {
	lazy val ls = new LineSearch
	override def minimize(f: Func1[Double, Array[Double], Array[Double]]): Array[Double] = {
	val p = Array.ofDim[Double](f.dimension)
	var progress = 999999d
	var vPrev = 999999d
	while(progress > tol) {
	val (v, g) = f.compute(p)
	if(doLineSearch)
	p = ls.search(f, p, g)
	else
	p -= learningRate * g
	progress = vPrev - v
	vPrev = v
	}
	p
	}
	}

	class LineSearch {
	def search(f: Func0[Double, Array[Double]], starting: Array[Double], direction: Array[Double]): Array[Double] = {
	val p = Arrays.copyOf(starting)
	var step = 1d
	var best = (99999d, p)
	while(step > 1e-6) {
	val v = f.value(p)
	if(v < best._1)
	best = (v, p.copy)
	step *= 0.3d
	p = starting + step * direction
	}
	best._2
	}
	}


	// this class has ancillary data that it hides
	// this data is added with promote
	trait HidesState[-LessInfo, +MoreInfo] {
	def promote(x: LessInfo): MoreInfo
	def demote(x: MoreInfo): LessInfo
	}
	// if you don't need conversions (e.g. no batches)
	implicit def nothingToHide[T] = new HidesState[T, T] {
	def promote(t: T): T = t
	def demote(t: T): T = t
	}
	class Minimizer1Scala[V, G, P, R] {
	def minimize(f: Func1[V, G, P])(implicit buildPoint: HidesState[R, P]): R
	}
	class SGDScala extends Minimizer1Scala[Double, Array[Double], PointWithBatch, Array[Double]] {
	def minimize(f: Func1[Double, Array[Double], PointWithBatch])
	(implicit batch: HidesState[Array[Double], PointWithBatch]): Array[Double] = {

	// in this example, batch stores the state that is in PointWithBatch
	// that isn't just the Array[Double].
	// we can build the Array[Double] by just knowing the dimension of the function
	val p: Array[Double] = Array.ofDim[Double](f.dimension)

	// the function needs to know the batch, so we let batch annotate
	// the Array[Double] that we made
	val pWithBatch: PointWithBatch = batch.promote(p)
	val (v, g) = f.compute(pWithBatch)

	// this allows us to resolve the seemingly conflicting requirements:
	// 1) the function needs to take a batch
	// 2) this class needs to instantiate new points as Array[Double]

	// when we finally find a PointWithBatch we like, we return
	val r: Array[Double] = batch.demote(pWithBatch)
	return r

	// ok, i believe we have officially hit over-engineering
	}
	}
	// decide on one of these, but is sort of orthogonal to this code
	type Solution = Vector
	type Solution = (Vector, Double) // solution + f(solution)

	trait Optimizer {
	def optimize(problem: OptimizationProblem): Solution
	}

	/**
	* convenient methods for creating OptimizationProblems
	*/
	object OptimizationProblem {
	def maximize(f: Func): OptimizationProblem[Func]
	def minimize(f: Func): OptimizationProblem[Func]
	}

	trait OptimizationProblem[F <: Func] {

	def objective: F

	def constraints: Seq[Constraint]

	// choose one of the following
	// i'm biased towards the first because it's more general
	def compare(solA: Solution, solB: Solution): Double
	def shouldMaximize: Boolean

	def timeLimit: Option[Time]
	def memLimit: Option[Memory]
	def cpuLimit: Option[Int]
	// etc
	}

	/**
	* for a new Optimizer that you implement,
	* if you want to tie into the testing code, you
	* need to implement one of these
	*/
	trait OptimizerFactory {

	/**
	* can inspect the problem to set parameters
	* e.g. maybe having short time limit means you'll set some Optimizer settings differently
	*/
	def defaultOptimizer(problem: OptimizationProblem): Optimizer

	/**
	* return a grid of parameters that you think will work for this problem
	*/
	def gridOfOptimizers(problem: OptimizatinoProblem, maxOptimizers: Int): Seq[Optimzer]
	}

	/**
	* TODO: add logging to show which settings are doing well
	* this class might dispatch optimization jobs to different grid machines!
	* this class might also be a nice home for GP code
	* (in which case we might change the name to something like MetaOptimzer)
	* actually probably better to describe the GP stuff as an Optimizer itself (over hyper-params)
	* and keep the semantics of this class purely for calling Optimzers (e.g. necessarily the "top")
	*/
	trait Sweeper {
	def sweep(problem: OptimizationProblem, optFactory: OptimizationFactory, n: Int = 10): (Solution, Optimizer) =
	sweep(problem, optFactory.gridOfOptimizers(problem, n))
	def sweep(problem: OptimizationProblem, optimizers: Seq[Optimizer]): (Solution, Optimizer) = {
	var best: (Solution, Optimizer) = null
	for(opt <- optimizers) {
	val sol = opt.optimize(problem)
	if(best == null \|\| problem.compare(sol, best) > 0d)
	best = (sol, opt)
	}
	return best
	}
	}