florianverhein/spark_scalaz-stream

## spark_scalaz-stream
import org.apache.spark._
import scalaz.stream._

/**
 * Simple proof of concept - fill an RDD from files that have been
 * processed by a scalaz-stream Process (in parallel).
 */
object SparkScalazStream {

  def main(args: Array[String]) {

    val conf = new SparkConf().setAppName("Spark scalaz-stream test")
    val spark = new SparkContext(conf)

    val files = spark.parallelize(args.toSeq, args.length)

    val contents = files.flatMap { case f =>
      // assuming f exists on every node. would really read from HDFS...

      val in = scalaz.stream.io.linesR(f)
      val p = in //actually, some really complicated stream
                 //processing of in that relies on order, etc
      p.runLog //TODO MUST AVOID THIS!!!!
       .run
    }

    val lines = contents.map(_ => 1).reduce(_ + _)
    println("lines = " + lines)
    spark.stop()
  }
}

/*
* TODO Solve this problem:
* turn p ( a Process[Task,String] ) into a TraversableOnce[String]
* and let spark drive the state machine, rather than the Task
*/
	import org.apache.spark._
	import scalaz.stream._

	/**
	* Simple proof of concept - fill an RDD from files that have been
	* processed by a scalaz-stream Process (in parallel).
	*/
	object SparkScalazStream {

	def main(args: Array[String]) {

	val conf = new SparkConf().setAppName("Spark scalaz-stream test")
	val spark = new SparkContext(conf)

	val files = spark.parallelize(args.toSeq, args.length)

	val contents = files.flatMap { case f =>
	// assuming f exists on every node. would really read from HDFS...

	val in = scalaz.stream.io.linesR(f)
	val p = in //actually, some really complicated stream
	//processing of in that relies on order, etc
	p.runLog //TODO MUST AVOID THIS!!!!
	.run
	}

	val lines = contents.map(_ => 1).reduce(_ + _)
	println("lines = " + lines)
	spark.stop()
	}
	}

	/*
	* TODO Solve this problem:
	* turn p ( a Process[Task,String] ) into a TraversableOnce[String]
	* and let spark drive the state machine, rather than the Task
	*/