andrewpalumbo/Matrices

## Matrices
Matrix A:{
 0 =>	{0:1.0,1:2.0}
 1 =>	{0:3.0,1:4.0}
}

Matrix B:{
 0 =>	{0:3.0,1:4.0}
 1 =>	{0:5.0,1:6.0}
}

## MatrixBlockIndexing.scala
val blocksAKeyed = blocksA.mapPartition( new RichMapPartitionFunction[(Array[K1], Matrix),
                                                            (Int, Array[K1], Matrix)] {
        // partition number
        var part: Int = 0

        // get the index of the partition
        override def open(params: Configuration): Unit = {
           part = getRuntimeContext.getIndexOfThisSubtask
         }

         // bind the partition number to each keySet/block
         def mapPartition(values: java.lang.Iterable[(Array[K1], Matrix)], out: Collector[(Int, Array[K1], Matrix)]): Unit  = {

           val blockIter = values.iterator()
           if (blockIter.hasNext()) {
             val r = part -> blockIter.next
             require(!blockIter.hasNext, s"more than 1 (${blockIter.asScala.size + 1}) blocks per partition and A of AB'")
             out.collect((r._1, r._2._1, r._2._2))
           }
         }
      })


      val blocksBKeyed = blocksB.mapPartition( new RichMapPartitionFunction[(Array[K2], Matrix),
                                                            (Int, Array[K2], Matrix)] {
        // partition number
        var part: Int = 0

        // get the index of the partition
        override def open(params: Configuration): Unit = {
           part = getRuntimeContext.getIndexOfThisSubtask
         }

         // bind the partition number to each keySet/block
         def mapPartition(values: java.lang.Iterable[(Array[K2], Matrix)], out: Collector[(Int, Array[K2], Matrix)]): Unit  = {

           val blockIter = values.iterator()
           if (blockIter.hasNext()) {
             val r = part -> blockIter.next
             require(!blockIter.hasNext, s"more than 1 (${blockIter.asScala.size + 1}) blocks per partition and A of AB'")
             out.collect((r._1, r._2._1, r._2._2))
           }
         }
      })


## Partioning of Matrix B when Global degree of parallelism is overridden (output of repartitionBloicksB.scala)la
Partition # bound to blocks of Matrix B (partitions 0,1/4)
0 -> {
 0 =>	{0:5.0,1:6.0}
}
1 -> {
 0 =>	{0:3.0,1:4.0}
}

Note that partition 2 has been moved into partition 0, and partitions are now out of order.

## Partitioning of matrices when Global degree of partitioning is > number of elements (1x2 matrices) in a DataSet

Partition # bound to blocks (1x2 matrices) of Matrix A (partitions 0,1/4)
0 -> {
 0 =>	{0:1.0,1:2.0}
}
1 -> {
 0 =>	{0:3.0,1:4.0}
}

Partition # bound to blocks (1x2 matrices) of Matrix B (partitions 1,2/4)
1 -> {
 0 =>	{0:3.0,1:4.0}
}
2 -> {
 0 =>	{0:5.0,1:6.0}
}

## repartitionBlocksB.scala
val blocksAKeyed = blocksA.mapPartition( new RichMapPartitionFunction[(Array[K1], Matrix),
                                                            (Int, Array[K1], Matrix)] {
        // partition number
        var part: Int = 0

        // get the index of the partition
        override def open(params: Configuration): Unit = {
           part = getRuntimeContext.getIndexOfThisSubtask
         }

         // bind the partition number to each keySet/block
         def mapPartition(values: java.lang.Iterable[(Array[K1], Matrix)], out: Collector[(Int, Array[K1], Matrix)]): Unit  = {

           val blockIter = values.iterator()
           if (blockIter.hasNext()) {
             val r = part -> blockIter.next
             require(!blockIter.hasNext, s"more than 1 (${blockIter.asScala.size + 1}) blocks per partition and A of AB'")
             out.collect((r._1, r._2._1, r._2._2))
           }
         }
      })

      // calcuate actual number of non empty partitions used by blocksA
      // we'll need this to key blocksB with the correct partition numbers
      // to join upon.  blocksA may use partitions 0,1 and blocksB may use partitions 2,3.
      val aNonEmptyParts = blocksA.map(new MapFunction[(Array[K1], Matrix), Int] {
        def map(a: (Array[K1], Matrix)): Int = {
          if (a._1.length > 0) {
            1
          } else {
            0
          }
        }
      }).reduce(new ReduceFunction[Int] {
        def reduce(a: Int, b: Int): Int = a + b
      }).collect().head


      implicit val typeInformationB = createTypeInformation[(Int, (Array[K2], Matrix))]

      val blocksBKeyed = blocksB

                          // repartition B Blocks into the number of empty partitions used by A
                          .setParallelism(aNonEmptyParts)


                          // map and assign 0-based ordinals to each matrix block.
                          .mapPartition( new RichMapPartitionFunction[(Array[K2], Matrix),
                                          (Int, Array[K2], Matrix)] {
        // partition number
        var part: Int = 0

        // get the index of the partition- this should be in [0, degree of parallelism)
        override def open(params: Configuration): Unit = {
          part = getRuntimeContext.getIndexOfThisSubtask
        }

        // bind the partition number to each keySet/block
        def mapPartition(values: java.lang.Iterable[(Array[K2], Matrix)], out: Collector[(Int, Array[K2], Matrix)]): Unit  = {

          val blockIter = values.iterator()
          if (blockIter.hasNext()) {
            val r = part -> blockIter.next
            require(!blockIter.hasNext, s"more than 1 (${blockIter.asScala.size + 1}) blocks per partition and A of AB'")
            out.collect((r._1, r._2._1, r._2._2))
          }
        }
      })
	Matrix A:{
	0 => {0:1.0,1:2.0}
	1 => {0:3.0,1:4.0}
	}

	Matrix B:{
	0 => {0:3.0,1:4.0}
	1 => {0:5.0,1:6.0}
	}
	val blocksAKeyed = blocksA.mapPartition( new RichMapPartitionFunction[(Array[K1], Matrix),
	(Int, Array[K1], Matrix)] {
	// partition number
	var part: Int = 0

	// get the index of the partition
	override def open(params: Configuration): Unit = {
	part = getRuntimeContext.getIndexOfThisSubtask
	}

	// bind the partition number to each keySet/block
	def mapPartition(values: java.lang.Iterable[(Array[K1], Matrix)], out: Collector[(Int, Array[K1], Matrix)]): Unit = {

	val blockIter = values.iterator()
	if (blockIter.hasNext()) {
	val r = part -> blockIter.next
	require(!blockIter.hasNext, s"more than 1 (${blockIter.asScala.size + 1}) blocks per partition and A of AB'")
	out.collect((r._1, r._2._1, r._2._2))
	}
	}
	})


	val blocksBKeyed = blocksB.mapPartition( new RichMapPartitionFunction[(Array[K2], Matrix),
	(Int, Array[K2], Matrix)] {
	// partition number
	var part: Int = 0

	// get the index of the partition
	override def open(params: Configuration): Unit = {
	part = getRuntimeContext.getIndexOfThisSubtask
	}

	// bind the partition number to each keySet/block
	def mapPartition(values: java.lang.Iterable[(Array[K2], Matrix)], out: Collector[(Int, Array[K2], Matrix)]): Unit = {

	val blockIter = values.iterator()
	if (blockIter.hasNext()) {
	val r = part -> blockIter.next
	require(!blockIter.hasNext, s"more than 1 (${blockIter.asScala.size + 1}) blocks per partition and A of AB'")
	out.collect((r._1, r._2._1, r._2._2))
	}
	}
	})
	Partition # bound to blocks of Matrix B (partitions 0,1/4)
	0 -> {
	0 => {0:5.0,1:6.0}
	}
	1 -> {
	0 => {0:3.0,1:4.0}
	}

	Note that partition 2 has been moved into partition 0, and partitions are now out of order.

	Partition # bound to blocks (1x2 matrices) of Matrix A (partitions 0,1/4)
	0 -> {
	0 => {0:1.0,1:2.0}
	}
	1 -> {
	0 => {0:3.0,1:4.0}
	}

	Partition # bound to blocks (1x2 matrices) of Matrix B (partitions 1,2/4)
	1 -> {
	0 => {0:3.0,1:4.0}
	}
	2 -> {
	0 => {0:5.0,1:6.0}
	}