Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save nag9s/f6734407c8b0c39fad5f10671b6faa68 to your computer and use it in GitHub Desktop.
Save nag9s/f6734407c8b0c39fad5f10671b6faa68 to your computer and use it in GitHub Desktop.
/**
* Created by itversity on 17/03/17.
* This is primarily to get the word count on the data received from
* nc -lk 19999
* Make sure build.sbt is updated with the dependency -
* libraryDependencies += "org.apache.spark" % "spark-streaming_2.10" % "1.6.2"
* Create jar, ship the jar, start nc, and then use spark-submit
* spark-submit --class SparkStreamingWordCount --master yarn --conf spark.ui.port=14562 retail_2.10-1.0.jar
*/
import org.apache.spark.SparkConf
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming._
object SparkStreamingWordCount {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Testing Streaming").setMaster("yarn-client")
val ssc = new StreamingContext(conf, Seconds(10))
val lines = ssc.socketTextStream("gw01.itversity.com", 19999)
val linesFlatMap = lines.flatMap(rec => rec.split(" "))
val linesMap = linesFlatMap.map((_, 1))
val linesRBK = linesMap.reduceByKey(_ + _)
linesRBK.print()
ssc.start()
ssc.awaitTermination()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment