aniquetahir/speedeval.sh

## speedeval.sh
cd ~/

# Set up project
mkdir projects
cd projects
git clone https://github.com/aniquetahir/yellowtaxi.git
git clone https://github.com/aniquetahir/datapolygamyutils.git
export POLYUTILS=$(pwd)/datapolygamyutils
sudo apt install maven axel pypy
cd yellowtaxi
git checkout hdfs
mvn install

# Initialize hadoop
hadoop fs -mkdir /user/root
hadoop fs -mkdir /user/root/data
pushd .
cd /tmp
axel -a https://s3-us-west-2.amazonaws.com/anique/yellowdata_pickup.tar.gz
tar zxvf yellowdata_pickup.tar.gz
hadoop fs -put yellowdata_pickup.csv /user/root/data/
popd

hadoop fs -mkdir /user/root/jars/
hadoop fs -put target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar /user/root/jars/yellowtaxi.jar
hadoop fs -put heirarchy.ser /user/root

# Aggravation
time spark-submit --class edu.asu.yellowtaxi.Aggravation target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar yellowdata_pickup.csv 0
time spark-submit --class edu.asu.yellowtaxi.ZoneAggravation target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar yellowdata_pickup.csv 0

# Intervention
time spark-submit --deploy-mode cluster --master yarn-client --class edu.asu.yellowtaxi.Intervention target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar yellowdata_pickup.csv 0
time spark-submit --class edu.asu.yellowtaxi.ZoneIntervention target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar yellowdata_pickup.csv 0

time spark-submit --class edu.asu.yellowtaxi.HeirarchichalIntervention target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar yellowdata_pickup.csv 0

cd ~/projects
git clone https://github.com/ViDA-NYU/data-polygamy.git
cd data-polygamy/sigmod16/
./prepareSoftware.sh
cd setup
echo -e "yellowdata\t309" > ../../data/datasets.txt
./hdfs_dir
hdfs dfs -put ../../data/block.txt block
hdfs dfs -put ../../data/block-graph.txt block-graph
pushd .
cd /tmp
axel -a https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-01.csv
tail -n +3 yellow_tripdata_2016-01.csv > yellowdata
head -n 1 yellow_tripdata_2016-01.csv > yellowdata.header
echo 'NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE' > yellowdata.defaults
pypy $POLYUTILS/datechanger.py yellowdata newyellowdata
rm yellowdata
mv newyellowdata yellowdata
awk -F ',' '{print $2","$4","$5","$6","$7","$16","$19","$20}' yellowdata > yellowdatastripped
cat yellowdata.header | awk -F ',' 'BEGIN {RS="\r\n"}; {print $2","$4","$5","$6","$7","$16","$19",tip_percentage"}' > yellowdatastripped.header
rm yellowdata
rm yellowdata.header
mv yellowdatastripped yellowdata
mv yellowdatastripped.header yellowdata.header
hadoop fs -put yellowdata data/
hadoop fs -put yellowdata.defaults data/
hadoop fs -put yellowdata.header data/
popd
	cd ~/

	# Set up project
	mkdir projects
	cd projects
	git clone https://github.com/aniquetahir/yellowtaxi.git
	git clone https://github.com/aniquetahir/datapolygamyutils.git
	export POLYUTILS=$(pwd)/datapolygamyutils
	sudo apt install maven axel pypy
	cd yellowtaxi
	git checkout hdfs
	mvn install

	# Initialize hadoop
	hadoop fs -mkdir /user/root
	hadoop fs -mkdir /user/root/data
	pushd .
	cd /tmp
	axel -a https://s3-us-west-2.amazonaws.com/anique/yellowdata_pickup.tar.gz
	tar zxvf yellowdata_pickup.tar.gz
	hadoop fs -put yellowdata_pickup.csv /user/root/data/
	popd

	hadoop fs -mkdir /user/root/jars/
	hadoop fs -put target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar /user/root/jars/yellowtaxi.jar
	hadoop fs -put heirarchy.ser /user/root

	# Aggravation
	time spark-submit --class edu.asu.yellowtaxi.Aggravation target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar yellowdata_pickup.csv 0
	time spark-submit --class edu.asu.yellowtaxi.ZoneAggravation target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar yellowdata_pickup.csv 0

	# Intervention
	time spark-submit --deploy-mode cluster --master yarn-client --class edu.asu.yellowtaxi.Intervention target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar yellowdata_pickup.csv 0
	time spark-submit --class edu.asu.yellowtaxi.ZoneIntervention target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar yellowdata_pickup.csv 0

	time spark-submit --class edu.asu.yellowtaxi.HeirarchichalIntervention target/yellowtaxi-1.0-SNAPSHOT-jar-with-dependencies.jar yellowdata_pickup.csv 0

	cd ~/projects
	git clone https://github.com/ViDA-NYU/data-polygamy.git
	cd data-polygamy/sigmod16/
	./prepareSoftware.sh
	cd setup
	echo -e "yellowdata\t309" > ../../data/datasets.txt
	./hdfs_dir
	hdfs dfs -put ../../data/block.txt block
	hdfs dfs -put ../../data/block-graph.txt block-graph
	pushd .
	cd /tmp
	axel -a https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-01.csv
	tail -n +3 yellow_tripdata_2016-01.csv > yellowdata
	head -n 1 yellow_tripdata_2016-01.csv > yellowdata.header
	echo 'NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE' > yellowdata.defaults
	pypy $POLYUTILS/datechanger.py yellowdata newyellowdata
	rm yellowdata
	mv newyellowdata yellowdata
	awk -F ',' '{print $2","$4","$5","$6","$7","$16","$19","$20}' yellowdata > yellowdatastripped
	cat yellowdata.header \| awk -F ',' 'BEGIN {RS="\r\n"}; {print $2","$4","$5","$6","$7","$16","$19",tip_percentage"}' > yellowdatastripped.header
	rm yellowdata
	rm yellowdata.header
	mv yellowdatastripped yellowdata
	mv yellowdatastripped.header yellowdata.header
	hadoop fs -put yellowdata data/
	hadoop fs -put yellowdata.defaults data/
	hadoop fs -put yellowdata.header data/
	popd