Install Python3, Scala and Apache Spark via Brew (http://brew.sh/)
brew update
brew install python3
brew install scala
brew install apache-spark
Set environment variables
echo "export SPARK_HOME='/usr/local/Cellar/apache-spark/1.6.0/libexec/'" >> ~/.bashrc
echo "export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH" >> ~/.bashrc
echo "export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.9-src.zip:$PYTHONPATH" >> ~/.bashrc
Install Jupyter (iPython Notebook) via pip(3)
pip3 install jupyter
Create an iPython profile
ipython profile create pyspark
Create a startup script for that profile
nano ~/.ipython/profile_pyspark/startup/00-spark-setup.py
import os
import sys
spark_home = os.environ.get('SPARK_HOME', None)
sys.path.insert(0, os.path.join(spark_home, 'python'))
sys.path.insert(0, os.path.join(spark_home, 'python/lib/py4j-0.9-src.zip'))
execfile(os.path.join(spark_home, 'python/pyspark/shell.py'))
Verify that profile works
ipython --profile=pyspark
Create a kernel spec for Jupyter
mkdir -p ~/.ipython/kernels/pyspark
nano ~/.ipython/kernels/pyspark/kernel.json
{
"display_name": "PySpark (Spark 1.6.0)",
"language": "python",
"argv": [
"/usr/local/bin/python3",
"-m",
"ipykernel",
"--profile=pyspark",
"-f",
"{connection_file}"
]
}
Verify Jupyter works
jupyter notebook