microamp/StatefulNetworkWordCount2.scala

## StatefulNetworkWordCount2.scala
import org.apache.spark._
import org.apache.spark.streaming._

object StatefulNetworkWordCount2 {
  def updateFunction2(word: String, one: Option[Int], state: State[Int]) = {
    val sum = one.getOrElse(0) + state.getOption.getOrElse(0)
    val output = (word, sum)
    state.update(sum)
    output
  }

  def main(args: Array[String]) {
    // Create a local StreamingContext with two working thread and batch interval of 1 second.
    // The master requires 2 cores to prevent from a starvation scenario.
    val conf = new SparkConf().setMaster("local[8]").setAppName("StatefulNetworkWordCount2")
    val ssc = new StreamingContext(conf, Seconds(5))

    // Set checkpoint directory
    ssc.checkpoint(".")

    // Create a DStream that will connect to hostname:port, like localhost:9999
    val lines = ssc.socketTextStream("localhost", 9999)

    // Split each line into words
    val words = lines.flatMap(_.split(" "))

    // Count each word in each batch
    val pairs = words.map(word => (word, 1))

    // Initial state RDD for mapWithState operation
    val initialRDD = ssc.sparkContext.parallelize(List(("hello", 0)))

    // Update state using `mapWithState`
    val stateDStream = pairs.mapWithState(
      StateSpec.function(updateFunction2 _).initialState(initialRDD)
    )

    // Print the first ten elements of each RDD generated in this DStream to the console
    stateDStream.print()

    ssc.start() // Start the computation
    ssc.awaitTermination() // Wait for the computation to terminate
  }
}
	import org.apache.spark._
	import org.apache.spark.streaming._

	object StatefulNetworkWordCount2 {
	def updateFunction2(word: String, one: Option[Int], state: State[Int]) = {
	val sum = one.getOrElse(0) + state.getOption.getOrElse(0)
	val output = (word, sum)
	state.update(sum)
	output
	}

	def main(args: Array[String]) {
	// Create a local StreamingContext with two working thread and batch interval of 1 second.
	// The master requires 2 cores to prevent from a starvation scenario.
	val conf = new SparkConf().setMaster("local[8]").setAppName("StatefulNetworkWordCount2")
	val ssc = new StreamingContext(conf, Seconds(5))

	// Set checkpoint directory
	ssc.checkpoint(".")

	// Create a DStream that will connect to hostname:port, like localhost:9999
	val lines = ssc.socketTextStream("localhost", 9999)

	// Split each line into words
	val words = lines.flatMap(_.split(" "))

	// Count each word in each batch
	val pairs = words.map(word => (word, 1))

	// Initial state RDD for mapWithState operation
	val initialRDD = ssc.sparkContext.parallelize(List(("hello", 0)))

	// Update state using `mapWithState`
	val stateDStream = pairs.mapWithState(
	StateSpec.function(updateFunction2 _).initialState(initialRDD)
	)

	// Print the first ten elements of each RDD generated in this DStream to the console
	stateDStream.print()

	ssc.start() // Start the computation
	ssc.awaitTermination() // Wait for the computation to terminate
	}
	}