MLnick/SparkML.scala

## SparkML.scala
// An Example is an observation with optional target value and features in the form of a vector of Doubles
case class Example(target: Option[Double] = None, features: Vector[Double])

// Base model API looks something like:
abstract class BaseModel(val modelSettings: Settings)
  extends Serializable
  with Logging {

  def fit(data: RDD[Example])

  def fit[U](data: RDD[U])(implicit dataMapping: U => Example) { fit(data.map(dataMapping)) }

  def predict(data: RDD[Example])

}

// Models are free to implement their own additional fit methods, e.g. ALS does
def fit(data: RDD[(Int, Int, Double)])(implicit evidence: Manifest[RDD[(Int, Int, Double)]]) {
    fit(data.map{ case (userId, itemId, score) =>
      Example(Option(score), DenseVector(userId - 1, itemId - 1)) }
    )
}

// Models can have a default "DataMapping" from raw data (usually text) to model inputs
object DefaultALSMapping extends DataMapping[String] {

  override def call(str: String) = {
    DataMapping.numberStringSplit(str) match {
      case Array(userId, itemId, score, _*) =>
        Example(Option(score.toDouble), DenseVector(userId.toDouble - 1, itemId.toDouble - 1))
    }
  }

}
	// An Example is an observation with optional target value and features in the form of a vector of Doubles
	case class Example(target: Option[Double] = None, features: Vector[Double])

	// Base model API looks something like:
	abstract class BaseModel(val modelSettings: Settings)
	extends Serializable
	with Logging {

	def fit(data: RDD[Example])

	def fit[U](data: RDD[U])(implicit dataMapping: U => Example) { fit(data.map(dataMapping)) }

	def predict(data: RDD[Example])

	}

	// Models are free to implement their own additional fit methods, e.g. ALS does
	def fit(data: RDD[(Int, Int, Double)])(implicit evidence: Manifest[RDD[(Int, Int, Double)]]) {
	fit(data.map{ case (userId, itemId, score) =>
	Example(Option(score), DenseVector(userId - 1, itemId - 1)) }
	)
	}

	// Models can have a default "DataMapping" from raw data (usually text) to model inputs
	object DefaultALSMapping extends DataMapping[String] {

	override def call(str: String) = {
	DataMapping.numberStringSplit(str) match {
	case Array(userId, itemId, score, _*) =>
	Example(Option(score.toDouble), DenseVector(userId.toDouble - 1, itemId.toDouble - 1))
	}
	}

	}