Matt Lehman mlehman

## MultipleOutputsExample.scala
/* Example using MultipleOutputs to write a Spark RDD to multiples files.
   Based on saveAsNewAPIHadoopFile implemented in org.apache.spark.rdd.PairRDDFunctions, org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil.

  val values = sc.parallelize(List(
    ("fruit/items", "apple"),
    ("vegetable/items", "broccoli"),
    ("fruit/items", "pear"),
    ("fruit/items", "peach"),
    ("vegetable/items", "celery"),
    ("vegetable/items", "spinach")

## gist:69dc9eaa1e254080833c
 implicit class ProductRDD[T <: Product](rdd: RDD[T]) {

    /* Saves a RDD of Tuples into a TSV.
     * Ex: Employee(emp_id = 123, Name(first="Bob",last="Smith")) => "123\tBob\tSmith"
     */
    def saveAsTsv(path: String) {
      rdd.map(p => p.productIterator.flatMap {
        case a: Product => a.productIterator //flattens nested case classes
        case b => Seq(b)
      }.mkString("\t"))

## ejson.sh
sed -e 's/NumberLong("*\(-*[[:digit:]]*\)"*)/{ "$numberLong" : "\1" }/'  -e 's/ObjectId("*\([[:alnum:]]*\)"*)/{ "$oid" : "\1
" }/'

## hgrep.sh
# utility function for your profile to search history

hgrep(){
    history | grep $1 | sort -k 2 | uniq -c -f 1 | sort
}
	/* Example using MultipleOutputs to write a Spark RDD to multiples files.
	Based on saveAsNewAPIHadoopFile implemented in org.apache.spark.rdd.PairRDDFunctions, org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil.

	val values = sc.parallelize(List(
	("fruit/items", "apple"),
	("vegetable/items", "broccoli"),
	("fruit/items", "pear"),
	("fruit/items", "peach"),
	("vegetable/items", "celery"),
	("vegetable/items", "spinach")
	implicit class ProductRDD[T <: Product](rdd: RDD[T]) {

	/* Saves a RDD of Tuples into a TSV.
	* Ex: Employee(emp_id = 123, Name(first="Bob",last="Smith")) => "123\tBob\tSmith"
	*/
	def saveAsTsv(path: String) {
	rdd.map(p => p.productIterator.flatMap {
	case a: Product => a.productIterator //flattens nested case classes
	case b => Seq(b)
	}.mkString("\t"))
	sed -e 's/NumberLong("\(-[[:digit:]]\)")/{ "$numberLong" : "\1" }/' -e 's/ObjectId("\([[:alnum:]]\)"*)/{ "$oid" : "\1
	" }/'
	# utility function for your profile to search history

	hgrep(){
	history \| grep $1 \| sort -k 2 \| uniq -c -f 1 \| sort
	}