Created
October 4, 2016 18:52
-
-
Save BenFradet/edcfba1e3cae860b9a0f0e2ca26554bb to your computer and use it in GitHub Desktop.
spark submit script for https://benfradet.github.io/blog/2015/12/16/Exploring-spark.ml-with-the-Titanic-Kaggle-competition
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Locates the directory where the script is | |
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |
# Name of our output file to submit to Kaggle | |
OUTPUT="classified.csv" | |
# Same name but it's a temp file we'll use later | |
TMP_FILE="${OUTPUT}2" | |
# Clean up | |
rm -rf ${OUTPUT} | |
rm -rf ${TMP_FILE} | |
cd ${DIR} | |
# Builds the application and create a fat jar will all the dependencies | |
mvn clean package | |
spark-submit \ | |
--class com.github.benfradet.Titanic \ # class to launch | |
--master local[2] \ # we'll use two cores on the local machine | |
target/titanic-1.0-SNAPSHOT.jar \ # fat jar to use | |
src/main/resources/train.csv src/main/resources/test.csv ${OUTPUT} # arguments to the Titanic's main: train file, test file and the file we want as output | |
# move around the generated file | |
mv ${OUTPUT}/part-00000 ${TMP_FILE} | |
rm -rf ${OUTPUT} | |
mv ${TMP_FILE} ${OUTPUT} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment