Skip to content

Instantly share code, notes, and snippets.

@mjhb
Last active August 29, 2015 14:23
Show Gist options
  • Save mjhb/532fad76637e1f136429 to your computer and use it in GitHub Desktop.
Save mjhb/532fad76637e1f136429 to your computer and use it in GitHub Desktop.
Simple Spark Streaming with Kafka Example
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.{Logging, SparkConf}
import scala.language.implicitConversions
object SimpleStreamer extends Logging {
def main(args: Array[String]) {
if (args.isEmpty || args.length > 1)
throw new IllegalArgumentException("missing zookeeper host:port")
logInfo("Streamer initializing")
val sparkConf = new SparkConf().setAppName("Streamer")
logInfo("Streamer created SparkConf")
val ssc = new StreamingContext(sparkConf, Seconds(10))
logInfo("Streamer created SparkContext")
val group = "test-group"
val topicMap = Map("experiment.ingest.1" -> 1)
val zkQuorum = args(0)
val messages = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2)
logInfo("Streamer created kafka stream")
// messages.print()
messages.foreachRDD { pairRDD =>
logInfo("Streamer foreachRDD")
if (pairRDD.count > 0) {
logInfo("Streamer found RDD")
val count = pairRDD.count()
logInfo(s"Streamer processing RDD with $count rows")
}
}
logInfo("Streamer starting")
ssc.start()
ssc.awaitTermination()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment