tangoAnkur

## setup.sh
# scala install
wget www.scala-lang.org/files/archive/scala-2.11.7.deb
sudo dpkg -i scala-2.11.7.deb

# sbt installation
echo "deb https://dl.bintray.com/sbt/debian /" | sudo tee -a /etc/apt/sources.list.d/sbt.list
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 642AC823
sudo apt-get update
sudo apt-get install sbt

## innerjoindf.py
#!/usr/bin/python
# -*- coding: utf-8 -*-

from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import StringType
from pyspark import SQLContext
from itertools import islice
from pyspark.sql.functions import col

## 00-MultipleOutputs
********************************
Gist
********************************

Motivation
-----------
The typical mapreduce job creates files with the prefix "part-"..and then the "m" or "r" depending
on whether it is a map or a reduce output, and then the part number.  There are scenarios where we
may want to create separate files based on criteria-data keys and/or values.  Enter the "MultipleOutputs"
functionality.
	# scala install
	wget www.scala-lang.org/files/archive/scala-2.11.7.deb
	sudo dpkg -i scala-2.11.7.deb

	# sbt installation
	echo "deb https://dl.bintray.com/sbt/debian /" \| sudo tee -a /etc/apt/sources.list.d/sbt.list
	sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 642AC823
	sudo apt-get update
	sudo apt-get install sbt
	#!/usr/bin/python
	# -- coding: utf-8 --

	from pyspark import SparkContext
	from pyspark.sql import SparkSession
	from pyspark.sql.functions import *
	from pyspark.sql.types import StringType
	from pyspark import SQLContext
	from itertools import islice
	from pyspark.sql.functions import col
	********************************
	Gist
	********************************

	Motivation
	-----------
	The typical mapreduce job creates files with the prefix "part-"..and then the "m" or "r" depending
	on whether it is a map or a reduce output, and then the part number. There are scenarios where we
	may want to create separate files based on criteria-data keys and/or values. Enter the "MultipleOutputs"
	functionality.