patmcdonough/SparkLogisticRegressionExampleSnippet.scala

## SparkLogisticRegressionExampleSnippet.scala
val points = sc.textFile("...").map(parsePoint).cache()
var w = Vector.random(D) //current separating plane
for (i <- 1 to ITERATIONS) {
  val gradient = points.map(p =>
    (1 / (1 + exp(-p.y*(w dot p.x))) - 1) * p.y * p.x)
    .reduce(_ + _)
    w -= gradient
}
println("Final separating plane: " + w)

## SparkStreamingTwitterPopularTags.scala
val ssc = new StreamingContext(
  args(0), "NetworkHashCount",
  Seconds(10), System.getenv("SPARK_HOME"),
  Seq(System.getenv("SPARK_EXAMPLES_JAR")))

val lines = ssc.socketTextStream("localhost", 9999)
val words = lines.flatMap(_.split(" "))
  .filter(_.startsWith("#"))
val wordCounts = words.map(x => (x, 1))
  .reduceByKey(_ + _)
wordCounts.print()
ssc.start()

## SparkWordCountExampleSnippet.scala
val file = sc.textFile("hdfs://.../pagecounts-*.gz");
val counts = file.flatMap(line => line.split(" "));
  .map(word => (word, 1))
  .reduceByKey(_ + _)
counts.saveAsTextFile("hdfs://.../word-count");
	val points = sc.textFile("...").map(parsePoint).cache()
	var w = Vector.random(D) //current separating plane
	for (i <- 1 to ITERATIONS) {
	val gradient = points.map(p =>
	(1 / (1 + exp(-p.y(w dot p.x))) - 1) p.y * p.x)
	.reduce(_ + _)
	w -= gradient
	}
	println("Final separating plane: " + w)
	val ssc = new StreamingContext(
	args(0), "NetworkHashCount",
	Seconds(10), System.getenv("SPARK_HOME"),
	Seq(System.getenv("SPARK_EXAMPLES_JAR")))

	val lines = ssc.socketTextStream("localhost", 9999)
	val words = lines.flatMap(_.split(" "))
	.filter(_.startsWith("#"))
	val wordCounts = words.map(x => (x, 1))
	.reduceByKey(_ + _)
	wordCounts.print()
	ssc.start()
	val file = sc.textFile("hdfs://.../pagecounts-*.gz");
	val counts = file.flatMap(line => line.split(" "));
	.map(word => (word, 1))
	.reduceByKey(_ + _)
	counts.saveAsTextFile("hdfs://.../word-count");