Skip to content

Instantly share code, notes, and snippets.

@gdusbabek
Last active August 28, 2019 23:29
Show Gist options
  • Save gdusbabek/7b7788d7a5afdc466f09ac45d64284f5 to your computer and use it in GitHub Desktop.
Save gdusbabek/7b7788d7a5afdc466f09ac45d64284f5 to your computer and use it in GitHub Desktop.
package com.svds.data_platform_tutorial
import java.util.Properties
import java.util.concurrent.TimeUnit
import com.typesafe.scalalogging.LazyLogging
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.twitter.TwitterUtils
object TwitterIngestTutorial extends LazyLogging {
def main(args: Array[String]): Unit = {
Helpers.loadTwitterAuthFromFile("./twitter-secrets.properties")
Helpers.validateTwitterEnv()
// set up context
// create stream
// process stream
// some housekeeping
}
def publishTweets(tweets: RDD[(Long, String)]): Unit = {
}
case class KafkaWriter(brokers: String, topic: String) extends LazyLogging {
private val config = new Properties() {
// add configuration settings here.
}
val producer = new KafkaProducer[String, String](config)
// @todo: exercise for reader: make this asynchronous.
def write(key: String, data: String): Unit = {
// create record
// send to producer
}
def close(): Unit = {
// close producer.
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment