/MapReduceToy.scala

## MapReduceToy.scala
#!/bin/sh
exec scala -savecompiled "$0" "$@"
!#

// Toy Map Reduce framework:
class MapReduce[T,K,V,R](flatMapFn: (T) => Iterable[(K,V)], reduceFn: ((K,Iterable[V])) => R) {

  def apply(input: Iterable[T]): Map[K,R] = {
    // Apply the flatMap function:
    val mapped: Iterable[(K,V)] = input.flatMap(flatMapFn)
    // do the shuffle (in distributed systems, sets of keys are handled by different workers
    val shuffled: Map[K,Iterable[(K,V)]] = mapped.groupBy { _._1 }
    // Just keep the V values in the second part of the Map (to clean up the function we pass in)
    val values: Map[K, (K,Iterable[V])] = shuffled.map { case (k,kvs) => (k, (k, kvs.map { _._2 })) }
    // apply reduce:
    values.mapValues(reduceFn)
  }
}

/*
 * Now let's use our MapReduce framework to do wordcount
 */

// Map onto (Key, Value)
def mapFunction(line: String): Iterable[(String,Int)] =
  line.split("""\s+""")
    .map { word => (word, 1) }

def reduceFunction(groupedWords: (String, Iterable[Int])): Int =
  groupedWords._2.sum

// Pass these functions to our job:
val myWordCountJob = new MapReduce(mapFunction _, reduceFunction _)

// Run the job:
val input: Iterable[String] = scala.io.Source.stdin.getLines.toIterable
val result = myWordCountJob.apply(input)
//print it out:
result
  .toList
  .sortBy { -_._2 }
  .foreach { println(_) }
	#!/bin/sh
	exec scala -savecompiled "$0" "$@"
	!#

	// Toy Map Reduce framework:
	class MapReduce[T,K,V,R](flatMapFn: (T) => Iterable[(K,V)], reduceFn: ((K,Iterable[V])) => R) {

	def apply(input: Iterable[T]): Map[K,R] = {
	// Apply the flatMap function:
	val mapped: Iterable[(K,V)] = input.flatMap(flatMapFn)
	// do the shuffle (in distributed systems, sets of keys are handled by different workers
	val shuffled: Map[K,Iterable[(K,V)]] = mapped.groupBy { _._1 }
	// Just keep the V values in the second part of the Map (to clean up the function we pass in)
	val values: Map[K, (K,Iterable[V])] = shuffled.map { case (k,kvs) => (k, (k, kvs.map { _._2 })) }
	// apply reduce:
	values.mapValues(reduceFn)
	}
	}

	/*
	* Now let's use our MapReduce framework to do wordcount
	*/

	// Map onto (Key, Value)
	def mapFunction(line: String): Iterable[(String,Int)] =
	line.split("""\s+""")
	.map { word => (word, 1) }

	def reduceFunction(groupedWords: (String, Iterable[Int])): Int =
	groupedWords._2.sum

	// Pass these functions to our job:
	val myWordCountJob = new MapReduce(mapFunction _, reduceFunction _)

	// Run the job:
	val input: Iterable[String] = scala.io.Source.stdin.getLines.toIterable
	val result = myWordCountJob.apply(input)
	//print it out:
	result
	.toList
	.sortBy { -_._2 }
	.foreach { println(_) }