Skip to content

Instantly share code, notes, and snippets.

@tomaszezula
Created November 9, 2015 14:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomaszezula/a1972114c1c4dfa62220 to your computer and use it in GitHub Desktop.
Save tomaszezula/a1972114c1c4dfa62220 to your computer and use it in GitHub Desktop.
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.sql.Row
trait StreamTransformer[S, T] {
def parse(input: DStream[S]): DStream[T]
}
class ProxyRowTransformer extends StreamTransformer[String, Row] {
def parse(stream: DStream[String]): DStream[Row] = {
stream
.transform(rdd => {
rdd.map(line => {
// Transform lines of text into SQL records, include failures
})
})
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment