InvisibleTech/joiner.scala

## joiner.scala
// Need to :paste this into Spark Shell to see it work.
//

// Load up the columns
val alpha = sc.parallelize(List("a", "b", "c", "d"))
val nums = sc.parallelize(List(1, 2, 3, 4))

// Key them by index
val alphaK = alpha.zipWithIndex.map(t => (t._2, t._1))
val numsK = nums.zipWithIndex.map(t => (t._2, t._1))

// Join them - which gives you (k (v1, v2))
val joined = alphaK.join(numsK)

// Join the join again - adding a third column of duplicate data - (k ((V1, v2), v3))
val dupes =joined.join(alphaK)

// Okay - now flattent the tuples to a list - there may be a better way
val flatter = dupes.map(t => (t._1, t._2._1.productIterator.toList ++ List(t._2._2)))

// Take out the key - now all you have are three column rows
flatter.map(_._2).collect

## sampleoutput.sh
scala> dupes.collect
res54: Array[(Long, ((String, Int), String))] = Array((0,((a,1),a)), (1,((b,2),b)), (2,((c,3),c)), (3,((d,4),d)))

scala> flatter.map(_._2).collect
res55: Array[List[Any]] = Array(List(a, 1, a), List(b, 2, b), List(c, 3, c), List(d, 4, d))

scala>
	// Need to :paste this into Spark Shell to see it work.
	//

	// Load up the columns
	val alpha = sc.parallelize(List("a", "b", "c", "d"))
	val nums = sc.parallelize(List(1, 2, 3, 4))

	// Key them by index
	val alphaK = alpha.zipWithIndex.map(t => (t._2, t._1))
	val numsK = nums.zipWithIndex.map(t => (t._2, t._1))

	// Join them - which gives you (k (v1, v2))
	val joined = alphaK.join(numsK)

	// Join the join again - adding a third column of duplicate data - (k ((V1, v2), v3))
	val dupes =joined.join(alphaK)

	// Okay - now flattent the tuples to a list - there may be a better way
	val flatter = dupes.map(t => (t._1, t._2._1.productIterator.toList ++ List(t._2._2)))

	// Take out the key - now all you have are three column rows
	flatter.map(_._2).collect
	scala> dupes.collect
	res54: Array[(Long, ((String, Int), String))] = Array((0,((a,1),a)), (1,((b,2),b)), (2,((c,3),c)), (3,((d,4),d)))

	scala> flatter.map(_._2).collect
	res55: Array[List[Any]] = Array(List(a, 1, a), List(b, 2, b), List(c, 3, c), List(d, 4, d))

	scala>