Florian Verhein florianverhein

## gist:2ed965bde7324cb73325
/**
* Turn a Process[Task,O] into an Iterator[O].
*
* Uses the toTask trick discussed here: https://groups.google.com/forum/#!topic/scalaz/gx0eXHpQN48
* Note: "It's a hack because it's not resource safe - if you stop examining the `Task` before
* it completes, finalizers for the stream are not guaranteed to be run".
* Hence, the iterator should always be completely consumed.
*
* An earlier attempt at tackling this problem is kept below.
*/

## spark_scalaz-stream
import org.apache.spark._
import scalaz.stream._

/**
 * Simple proof of concept - fill an RDD from files that have been
 * processed by a scalaz-stream Process (in parallel).
 */
object SparkScalazStream {

  def main(args: Array[String]) {
	/**
	* Turn a Process[Task,O] into an Iterator[O].
	*
	* Uses the toTask trick discussed here: https://groups.google.com/forum/#!topic/scalaz/gx0eXHpQN48
	* Note: "It's a hack because it's not resource safe - if you stop examining the `Task` before
	* it completes, finalizers for the stream are not guaranteed to be run".
	* Hence, the iterator should always be completely consumed.
	*
	* An earlier attempt at tackling this problem is kept below.
	*/
	import org.apache.spark._
	import scalaz.stream._

	/**
	* Simple proof of concept - fill an RDD from files that have been
	* processed by a scalaz-stream Process (in parallel).
	*/
	object SparkScalazStream {

	def main(args: Array[String]) {