Hao Ren invkrh

## gist:05a83be081c1f713e15b
  def meanAveragedPrecision(k: Int, model: MatrixFactorizationModel, ratings: RDD[Rating]): Double = {
    def avgPrecisionK(actual: Seq[Int], predicted: Seq[Int], k: Int): Double = {
      val predK = predicted.take(k)
      var score = 0.0
      var numHits = 0.0
      for ((p, i) <- predK.zipWithIndex) {
        if (actual.contains(p) && !predK.take(i).contains(p)) {
          numHits += 1.0
          score += numHits / (i.toDouble + 1.0)
        }

## GroupByTest.scala
import context.RecoSysContext._
import org.apache.spark.HashPartitioner
import org.apache.spark.SparkContext._

object GroupByTest extends App {

  case class purchaseLog(client_id: String, ticket_id: String, store: String)

  val input = sc.textFile("/home/spark/workspace/data/pruchaseLog") // The input directory is generated by RDD.saveAsTextFile with 27 partitions
    .map(_.split(";", -1))

## ndcg
 def ndcgAt(k: Int): Double = {
    require(k > 0, "ranking position k should be positive")
    predictionAndLabels.map { case (pred, lab) =>
      val labSet = lab.toSet

      if (labSet.nonEmpty) {
        val labSetSize = labSet.size
        val predSize = pred.size
        val n = math.min(math.max(pred.length, labSetSize), k)
        var maxDcg = 0.0

## mockInput.scala
object test extends App {
  import scala.collection.JavaConversions.asJavaEnumeration
  import java.io.{ByteArrayInputStream, SequenceInputStream}

  val inputs = Iterator("asdf", "qewr", "wert")
    .map(x => new ByteArrayInputStream((x + "\n").getBytes))
  val in = new SequenceInputStream(asJavaEnumeration(inputs))
  Console.setIn(in)
  val a = readLine()
  val b = readLine()

## cc.scala
package me.invkrh.train

import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.rdd.RDD


/**
 * Created with IntelliJ IDEA.
 * User: invkrh
 * Date: 22/07/15

## Lambda.java
rdd.map(x -> b).foreach(x -> System.out.println(x)); // works
rdd.map(x -> b).foreach(System.out::println(x)); // serialization problem

/*
 * lambda expression -> interface
 * method reference -> function call
 * Note: idea propose using method ref which makes code broken
 */

## tailrec.scala
  class C {
    def fact(n: Int, result: Int): Int ={
      println("super")
      if(n == 0) result else fact(n - 1, n * result)
    }

  }


  class C2 extends C {

## collectionView.scala
scala> List(1,2,3,4,5) map {x => println("x"); x} map {y => println("y"); y}
x
x
x
x
x
y
y
y
y

## foldLeftAndFoldRight.scala
def foldRightViaFoldLeft_1[A, B](l: List[A], z: B)(f: (A, B) => B): B =
  foldLeft(l, (x: B) => x)((g, a) => (b: B) => g(f(a, b)))(z)


def foldLeftViaFoldRight[A, B](l: List[A], z: B)(f: (B, A) => B): B =
  foldRight(l, (b: B) => b)((a, g) => b => g(f(b, a)))(z)

## trasient.scala
object LazySerializationTest extends App {

  def serialize(obj: Any): Array[Byte] = {
    val bytes = new ByteArrayOutputStream()
    val out = new ObjectOutputStream(bytes)
    out.writeObject(obj)
    out.close()
    bytes.toByteArray
  }
	def meanAveragedPrecision(k: Int, model: MatrixFactorizationModel, ratings: RDD[Rating]): Double = {
	def avgPrecisionK(actual: Seq[Int], predicted: Seq[Int], k: Int): Double = {
	val predK = predicted.take(k)
	var score = 0.0
	var numHits = 0.0
	for ((p, i) <- predK.zipWithIndex) {
	if (actual.contains(p) && !predK.take(i).contains(p)) {
	numHits += 1.0
	score += numHits / (i.toDouble + 1.0)
	}
	import context.RecoSysContext._
	import org.apache.spark.HashPartitioner
	import org.apache.spark.SparkContext._

	object GroupByTest extends App {

	case class purchaseLog(client_id: String, ticket_id: String, store: String)

	val input = sc.textFile("/home/spark/workspace/data/pruchaseLog") // The input directory is generated by RDD.saveAsTextFile with 27 partitions
	.map(_.split(";", -1))
	def ndcgAt(k: Int): Double = {
	require(k > 0, "ranking position k should be positive")
	predictionAndLabels.map { case (pred, lab) =>
	val labSet = lab.toSet

	if (labSet.nonEmpty) {
	val labSetSize = labSet.size
	val predSize = pred.size
	val n = math.min(math.max(pred.length, labSetSize), k)
	var maxDcg = 0.0
	object test extends App {
	import scala.collection.JavaConversions.asJavaEnumeration
	import java.io.{ByteArrayInputStream, SequenceInputStream}

	val inputs = Iterator("asdf", "qewr", "wert")
	.map(x => new ByteArrayInputStream((x + "\n").getBytes))
	val in = new SequenceInputStream(asJavaEnumeration(inputs))
	Console.setIn(in)
	val a = readLine()
	val b = readLine()
	package me.invkrh.train

	import org.apache.spark.{SparkContext, SparkConf}
	import org.apache.spark.rdd.RDD


	/**
	* Created with IntelliJ IDEA.
	* User: invkrh
	* Date: 22/07/15
	rdd.map(x -> b).foreach(x -> System.out.println(x)); // works
	rdd.map(x -> b).foreach(System.out::println(x)); // serialization problem

	/*
	* lambda expression -> interface
	* method reference -> function call
	* Note: idea propose using method ref which makes code broken
	*/
	class C {
	def fact(n: Int, result: Int): Int ={
	println("super")
	if(n == 0) result else fact(n - 1, n * result)
	}

	}


	class C2 extends C {
	scala> List(1,2,3,4,5) map {x => println("x"); x} map {y => println("y"); y}
	x
	x
	x
	x
	x
	y
	y
	y
	y
	def foldRightViaFoldLeft_1[A, B](l: List[A], z: B)(f: (A, B) => B): B =
	foldLeft(l, (x: B) => x)((g, a) => (b: B) => g(f(a, b)))(z)


	def foldLeftViaFoldRight[A, B](l: List[A], z: B)(f: (B, A) => B): B =
	foldRight(l, (b: B) => b)((a, g) => b => g(f(b, a)))(z)
	object LazySerializationTest extends App {

	def serialize(obj: Any): Array[Byte] = {
	val bytes = new ByteArrayOutputStream()
	val out = new ObjectOutputStream(bytes)
	out.writeObject(obj)
	out.close()
	bytes.toByteArray
	}