John Ferguson InvisibleTech

## joiner.scala
// Need to :paste this into Spark Shell to see it work.
//

// Load up the columns
val alpha = sc.parallelize(List("a", "b", "c", "d"))
val nums = sc.parallelize(List(1, 2, 3, 4))

// Key them by index
val alphaK = alpha.zipWithIndex.map(t => (t._2, t._1))
val numsK = nums.zipWithIndex.map(t => (t._2, t._1))

## CanDetermineSignTest.java
package org.invisibletech;

import static org.junit.Assert.*;

import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;

public class CanDetermineSignTest {
    @Rule public ExpectedException expectedException = ExpectedException.none();

## pivotRDD.scala
/*
This Pivot sample is based on the 5th answer given on:

http://stackoverflows.xyz/questions/30260015/reshaping-pivoting-data-in-spark-rdd-and-or-spark-dataframes

The answer above was written in Python, which I don't know very well.  In addition, my Spark-Fu
is still somewhat introductory in some areas.  To help with other aspects of translating the Python
sample I used these references:

http://codingjunkie.net/spark-agr-by-key/

## MooCowCustColl.scala
import scala.collection._
import scala.collection.generic._

//
// Simple example of custom Traversable that is provided by the Daily Scala:
// http://daily-scala.blogspot.com/2010/04/creating-custom-traversable.html
//

class MooCow[A](seq: A*) extends Traversable[A] with GenericTraversableTemplate[A, MooCow] with TraversableLike[A, MooCow[A]]{
    override def companion = MooCow

## power.scala
def pow(x: Double, n: Int) : Double = {
  n match {
    case 0 => 1.0
    case n if n < 0 => 1.0 / pow(x, Math.abs(n))
    case n if (n & 0x1) == 0 =>  pow(x, n >> 1) * pow(x, n >> 1)
    case _ => x * pow(x, n - 1)
  }
}

## palindrome.scala
import scala.collection.immutable.StringOps._

@annotation.tailrec
def isPalindrome(strSeq : IndexedSeq[Char]) : Boolean = strSeq match {
  case _ if strSeq.size < 2 => true
  case _ if strSeq.take(1) == strSeq.takeRight(1) => isPalindrome(strSeq.slice(1, strSeq.size-1))
  case _ => false
}

## insertionsort.scala
import Ordering.Implicits._

def insertionSort[T:Ordering](a: Array[T]) : Unit = {
  for( i <- 1 until a.length) insert(a, i-1, a(i))
}

def insert[T:Ordering](a: Array[T], rightIndex: Int, x: T) {
  var i = rightIndex
  while (i >= 0 && a(i) > x)  {
      a(i+1) = a(i)

## selectionsort.scala
/*
While refreshing how to determine orders of algorithms: O, Ω, atc. using an online tutorial, it covered selecton sort as an
example.  So, I decided to implment it in Scala to be a generic version, sort of.
*/
import Ordering.Implicits._

def indexOfMinimum[T:Ordering](a: Array[T], start: Int) : Int = {
  var minValue = a(start)
  var mindex = start

## NoReverseFoldright.scala
  /*
  Taken from https://github.com/fpinscala/fpinscala/blob/master/answers/src/main/scala/fpinscala/datastructures/List.scala#L159

  This code is not mine - I only posted it here to share it.  It is part of the answer key solutions from http://www.manning.com/bjarnason/

  Paul Chiusano is one of the authors and has posted on topics such as Scala's limited type inferecing:
  http://pchiusano.blogspot.com/2011/05/making-most-of-scalas-extremely-limited.html

  */


## ApacheSparkLoadCsvFileHDFS2SQLContextAndTranspose
// Assuming the file is loaded into a localhost HDFS node:
// hadoop fs -ls -R /
// drwxr-xr-x   - xxxxxxxxxxxx supergroup          0 2015-02-02 22:26 /spark
// -rw-r--r--   1 xxxxxxxxxxxx supergroup         78 2015-02-02 22:26 /spark/peopleall.txt
//
// All of this code is from
//     http://www.infoobjects.com/spark-sql-schemardd-programmatically-specifying-schema/
//     https://github.com/bbnsumanth/transposing
//
// with some tweakd by me to run on my files and HDFS Node.
	// Need to :paste this into Spark Shell to see it work.
	//

	// Load up the columns
	val alpha = sc.parallelize(List("a", "b", "c", "d"))
	val nums = sc.parallelize(List(1, 2, 3, 4))

	// Key them by index
	val alphaK = alpha.zipWithIndex.map(t => (t._2, t._1))
	val numsK = nums.zipWithIndex.map(t => (t._2, t._1))
	package org.invisibletech;

	import static org.junit.Assert.*;

	import org.junit.Rule;
	import org.junit.Test;
	import org.junit.rules.ExpectedException;

	public class CanDetermineSignTest {
	@Rule public ExpectedException expectedException = ExpectedException.none();
	/*
	This Pivot sample is based on the 5th answer given on:

	http://stackoverflows.xyz/questions/30260015/reshaping-pivoting-data-in-spark-rdd-and-or-spark-dataframes

	The answer above was written in Python, which I don't know very well. In addition, my Spark-Fu
	is still somewhat introductory in some areas. To help with other aspects of translating the Python
	sample I used these references:

	http://codingjunkie.net/spark-agr-by-key/
	import scala.collection._
	import scala.collection.generic._

	//
	// Simple example of custom Traversable that is provided by the Daily Scala:
	// http://daily-scala.blogspot.com/2010/04/creating-custom-traversable.html
	//

	class MooCow[A](seq: A*) extends Traversable[A] with GenericTraversableTemplate[A, MooCow] with TraversableLike[A, MooCow[A]]{
	override def companion = MooCow
	def pow(x: Double, n: Int) : Double = {
	n match {
	case 0 => 1.0
	case n if n < 0 => 1.0 / pow(x, Math.abs(n))
	case n if (n & 0x1) == 0 => pow(x, n >> 1) * pow(x, n >> 1)
	case _ => x * pow(x, n - 1)
	}
	}
	import scala.collection.immutable.StringOps._

	@annotation.tailrec
	def isPalindrome(strSeq : IndexedSeq[Char]) : Boolean = strSeq match {
	case _ if strSeq.size < 2 => true
	case _ if strSeq.take(1) == strSeq.takeRight(1) => isPalindrome(strSeq.slice(1, strSeq.size-1))
	case _ => false
	}
	import Ordering.Implicits._

	def insertionSort[T:Ordering](a: Array[T]) : Unit = {
	for( i <- 1 until a.length) insert(a, i-1, a(i))
	}

	def insert[T:Ordering](a: Array[T], rightIndex: Int, x: T) {
	var i = rightIndex
	while (i >= 0 && a(i) > x) {
	a(i+1) = a(i)
	/*
	While refreshing how to determine orders of algorithms: O, Ω, atc. using an online tutorial, it covered selecton sort as an
	example. So, I decided to implment it in Scala to be a generic version, sort of.
	*/
	import Ordering.Implicits._

	def indexOfMinimum[T:Ordering](a: Array[T], start: Int) : Int = {
	var minValue = a(start)
	var mindex = start
	/*
	Taken from https://github.com/fpinscala/fpinscala/blob/master/answers/src/main/scala/fpinscala/datastructures/List.scala#L159

	This code is not mine - I only posted it here to share it. It is part of the answer key solutions from http://www.manning.com/bjarnason/

	Paul Chiusano is one of the authors and has posted on topics such as Scala's limited type inferecing:
	http://pchiusano.blogspot.com/2011/05/making-most-of-scalas-extremely-limited.html

	*/
	// Assuming the file is loaded into a localhost HDFS node:
	// hadoop fs -ls -R /
	// drwxr-xr-x - xxxxxxxxxxxx supergroup 0 2015-02-02 22:26 /spark
	// -rw-r--r-- 1 xxxxxxxxxxxx supergroup 78 2015-02-02 22:26 /spark/peopleall.txt
	//
	// All of this code is from
	// http://www.infoobjects.com/spark-sql-schemardd-programmatically-specifying-schema/
	// https://github.com/bbnsumanth/transposing
	//
	// with some tweakd by me to run on my files and HDFS Node.