Skip to content

Instantly share code, notes, and snippets.

@samklr
Last active August 29, 2015 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samklr/7f5f938dac84a830e866 to your computer and use it in GitHub Desktop.
Save samklr/7f5f938dac84a830e866 to your computer and use it in GitHub Desktop.
name := "so_analytics"
version := "0.0.1-SNAPSHOT"
scalaVersion := "2.10.3"
scalacOptions ++= Seq("-deprecation", "-feature")
libraryDependencies += "org.scalatest" %% "scalatest" % "1.9.1" % "test"
libraryDependencies += "junit" % "junit" % "4.10" % "test"
libraryDependencies += "org.specs2" % "specs2_2.10" % "2.2"
libraryDependencies += "org.apache.spark" % "spark-core_2.10" % "0.9.1"
libraryDependencies += "org.apache.hadoop" % "hadoop-client" % "2.2.0" //Dont' forget to add this dependency, otherwise strange EOF exception may come up
libraryDependencies += "com.typesafe" % "config" % "1.2.1"
#!/usr/bin/env bash
# This file contains environment variables required to run Spark. Copy it as
# spark-env.sh and edit that to configure Spark for your site.
#
# The following variables can be set in this file:
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
# - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that
# we recommend setting app-wide options in the application's driver program.
# Examples of node-specific options : -Dspark.local.dir, GC options
# Examples of app-wide options : -Dspark.serializer
#
# If using the standalone deploy mode, you can also set variables for it here:
# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
# - SPARK_WORKER_DIR, to set the working directory of worker processes
export SPARK_WORKER_MEMORY=20g
export SPARK_DAEMON_MEMORY=3g
export SPARK_DAEMON_JAVA_OPTS="-Xms512m -Xmx8192m"
export SPARK_MASTER_IP=`hostname -f`
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Starts the master on the machine this script is executed on.
sbin=`dirname "$0"`
sbin=`cd "$sbin"; pwd`
START_TACHYON=false
while (( "$#" )); do
case $1 in
--with-tachyon)
if [ ! -e "$sbin"/../tachyon/bin/tachyon ]; then
echo "Error: --with-tachyon specified, but tachyon not found."
exit -1
fi
START_TACHYON=true
;;
esac
shift
done
. "$sbin/spark-config.sh"
if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
. "${SPARK_CONF_DIR}/spark-env.sh"
fi
if [ "$SPARK_MASTER_PORT" = "" ]; then
SPARK_MASTER_PORT=7077
fi
if [ "$SPARK_MASTER_IP" = "" ]; then
SPARK_MASTER_IP=`hostname -f`
fi
if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then
SPARK_MASTER_WEBUI_PORT=8080
fi
# Set SPARK_PUBLIC_DNS so the master report the correct webUI address to the slaves
if [ "$SPARK_PUBLIC_DNS" = "" ]; then
# If we appear to be running on EC2, use the public address by default:
# NOTE: ec2-metadata is installed on Amazon Linux AMI. Check based on that and hostname
if command -v ec2-metadata > /dev/null || [[ `hostname` == *ec2.internal ]]; then
export SPARK_PUBLIC_DNS=`wget -q -O - http://instance-data.ec2.internal/latest/meta-data/public-hostname`
fi
fi
"$sbin"/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT
if [ "$START_TACHYON" == "true" ]; then
"$sbin"/../tachyon/bin/tachyon bootstrap-conf $SPARK_MASTER_IP
"$sbin"/../tachyon/bin/tachyon format -s
"$sbin"/../tachyon/bin/tachyon-start.sh master
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment