Skip to content

Instantly share code, notes, and snippets.

@addisonj
Created June 29, 2016 04:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save addisonj/2b2da2520a15ab40e67f8d37f41e5461 to your computer and use it in GitHub Desktop.
Save addisonj/2b2da2520a15ab40e67f8d37f41e5461 to your computer and use it in GitHub Desktop.
#!/bin/bash
# Script to start the job manager
# args: <work dir for context> <cluster address> [proxy_user]
set -e
get_abs_script_path() {
pushd . >/dev/null
cd $(dirname $0)
appdir=$(pwd)
popd >/dev/null
}
get_abs_script_path
upload_other_jars() {
local sourceDir=$1
local workBucket=$2
local workPrefix=$3
local remoteFiles=""
for jar in $sourceDir/*.jar; do
local jarName=$(basename $jar)
local awsS3Path=http://$workBucket.s3.amazonaws.com/$workPrefix/$jarName
aws s3 cp $jar s3://$workBucket/$workPrefix/$jarName > /dev/null 2>&1
remoteFiles="${remoteFiles},${awsS3Path}"
done
echo "$remoteFiles"
}
. $appdir/setenv.sh
sjsJar=${SJS_JAR_LOC:-$appdir/spark-job-server.jar}
if [ -n "$ADD_SPARK_CONF" ]; then
IFS=', ' read -r -a sparkOpts <<< "$ADD_SPARK_CONF"
addConf=""
for conf in "${sparkOpts[@]}"; do
addConf="${addConf} --conf ${conf} "
done
else
addConf=""
fi
# Override logging options to provide per-context logging
LOGGING_OPTS="-Dlog4j.configuration=file:$appdir/log4j-server.properties -DLOG_DIR=$1"
GC_OPTS="-XX:+UseConcMarkSweepGC \
-verbose:gc -XX:+PrintGCTimeStamps -Xloggc:$appdir/gc.out \
-XX:MaxPermSize=512m \
-XX:+CMSClassUnloadingEnabled "
JAVA_OPTS="-XX:MaxDirectMemorySize=$MAX_DIRECT_MEMORY \
-XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true"
MAIN="spark.jobserver.JobManager"
if [ "$CLUSTER_MODE" = true ]; then
workdir=`basename $1`
confName=`basename $conffile`
workConfPath="$SJS_WORK_PREFIX/$workdir/$confName"
contextPath="$SJS_WORK_PREFIX/$workdir/context.conf"
dockerCreds="$DOCKER_CREDS"
# upload our config files to s3
aws s3 cp $conffile s3://$SJS_WORK_BUCKET/$workConfPath
aws s3 cp $1/context.conf s3://$SJS_WORK_BUCKET/$contextPath
workUri="https://$SJS_WORK_BUCKET.s3.amazonaws.com/$workConfPath"
contextUri="https://$SJS_WORK_BUCKET.s3.amazonaws.com/$contextPath"
jarFiles=$(upload_other_jars $MESOS_SANDBOX $SJS_WORK_BUCKET "$SJS_WORK_PREFIX/$workdir")
echo "uploaded jars $jarFiles"
# no comma with jarFiles, as it adds it itself
mesosUris="${workUri},${contextUri},${dockerCreds}${jarFiles}"
clusterOpts="--master $SPARK_SUPERVISOR --deploy-mode cluster --supervise --conf \"spark.mesos.uris=$mesosUris\""
clusterAddr="$2"
runDir="\$MESOS_SANDBOX"
sjsConf="\$MESOS_SANDBOX/$confName"
else
clusterOpts=""
clusterAddr="$2"
runDir="$1"
sjsConf="$conffile"
fi
if [ ! -z $3 ]; then
proxyUser="--proxy-user $3"
else
proxyUser=""
fi
cmd="$SPARK_HOME/bin/spark-submit --class $MAIN --driver-memory $JOBSERVER_MEMORY \
--conf 'spark.executor.extraJavaOptions=$LOGGING_OPTS' \
$addConf \
$clusterOpts \
$proxyUser \
--driver-java-options '$GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES' \
$sjsJar $runDir $clusterAddr $sjsConf"
echo "$cmd"
eval "$cmd" &
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment