Skip to content

Instantly share code, notes, and snippets.

@npatta01
Last active March 30, 2018 21:57
Show Gist options
  • Save npatta01/f444066c0c54d2c3cfe7f49265c890ac to your computer and use it in GitHub Desktop.
Save npatta01/f444066c0c54d2c3cfe7f49265c890ac to your computer and use it in GitHub Desktop.
#!/bin/bash
set -e
# Ensure we have conda installed.
PROJ_DIR=$PWD
cd $PROJ_DIR
MINICONDA_VERSION='4.2.12'
MINICONDA_VARIANT="3"
OS_TYPE="Linux-x86_64.sh"
MINICONDA_FULL_NAME="Miniconda$MINICONDA_VARIANT-$MINICONDA_VERSION-$OS_TYPE"
MINICONDA_SCRIPT_PATH="$PROJ_DIR/$MINICONDA_FULL_NAME"
CONDA_INSTALL_PATH="/opt/conda"
CONDA_BIN_PATH="$CONDA_INSTALL_PATH/bin"
export PATH="$CONDA_BIN_PATH:$PATH"
echo "Complete Miniconda version resolved to: $MINICONDA_FULL_NAME"
wget https://repo.continuum.io/miniconda/$MINICONDA_FULL_NAME -P "$PROJ_DIR"
echo "Downloaded $MINICONDA_FULL_NAME!"
ls -al $MINICONDA_SCRIPT_PATH
chmod 755 $MINICONDA_SCRIPT_PATH
echo "Installing $MINICONDA_FULL_NAME to $CONDA_INSTALL_PATH..."
bash $MINICONDA_SCRIPT_PATH -b -p $CONDA_INSTALL_PATH -f
chmod 755 $CONDA_INSTALL_PATH
#create symlink
ln -sf $CONDA_INSTALL_PATH "$PROJ_DIR/miniconda"
chmod 755 "$PROJ_DIR/miniconda"
echo "Adding path definition to profiles..."
echo "export CONDA_BIN_PATH=$CONDA_BIN_PATH" | tee -a /etc/profile.d/conda.sh #/etc/*bashrc /etc/profile
echo 'export PATH=$CONDA_BIN_PATH:$PATH' | tee -a /etc/profile.d/conda.sh #/etc/*bashrc /etc/profile
echo "export PYSPARK_PYTHON=$CONDA_BIN_PATH/python" | tee -a /etc/profile.d/conda.sh /etc/environment /usr/lib/spark/conf/spark-env.sh
#### CONDA DONE ###
conda config --set always_yes true --set changeps1 false
source /etc/profile.d/conda.sh
conda install pandas scikit-learn jupyter
conda install -c conda-forge jupyterlab setuptools
conda install -c conda-forge jupyter_contrib_nbextensions
jupyter serverextension enable --py jupyterlab --sys-prefix
echo "export PYTHONHASHSEED=0" | tee -a /etc/profile.d/conda.sh #/etc/*bashrc /usr/lib/spark/conf/spark-env.sh
echo "spark.executorEnv.PYTHONHASHSEED=0" >> /etc/spark/conf/spark-defaults.conf
# For storing notebooks on GCS. Pin version to make this script hermetic.
pip install jgscm==0.1.7
pip install google-cloud-storage
echo "Completed installing Jupyter!"
JUPYTER_PORT=8123
JUPYTER_IP=*
JUPYTER_NOTEBOOK_DIR="/root/notebooks"
[[ ! -d $JUPYTER_NOTEBOOK_DIR ]] && mkdir -p $JUPYTER_NOTEBOOK_DIR
echo "Creating Jupyter config..."
jupyter notebook --allow-root --generate-config -y --ip=${JUPYTER_IP}
echo "c.Application.log_level = 'DEBUG'" >> ~/.jupyter/jupyter_notebook_config.py
echo "c.NotebookApp.ip = '*'" >> ~/.jupyter/jupyter_notebook_config.py
echo "c.NotebookApp.open_browser = False" >> ~/.jupyter/jupyter_notebook_config.py
echo "c.NotebookApp.port = $JUPYTER_PORT" >> ~/.jupyter/jupyter_notebook_config.py
echo "c.NotebookApp.notebook_dir = '$JUPYTER_NOTEBOOK_DIR'" >> ~/.jupyter/jupyter_notebook_config.py
echo "c.NotebookApp.token = u'$JUPYTER_AUTH_TOKEN'" >> ~/.jupyter/jupyter_notebook_config.py
echo "Installing pyspark Kernel..."
SPARK_MAJOR_VERSION=$(spark-submit --version |& \
grep 'version' | head -n 1 | sed 's/.*version //' | cut -d '.' -f 1)
echo "Determined SPARK_MAJOR_VERSION to be '${SPARK_MAJOR_VERSION}'" >&2
# This will let us exit with error code if not found.
PY4J_ZIP=$(ls /usr/lib/spark/python/lib/py4j-*.zip)
# In case there are multiple py4j versions or unversioned symlinks to the
# versioned file, just choose the first one to use for jupyter.
PY4J_ZIP=$(echo ${PY4J_ZIP} | cut -d ' ' -f 1)
echo "Found PY4J_ZIP: '${PY4J_ZIP}'" >&2
PACKAGES_ARG=''
JUPYTER_KERNEL_DIR="kernels/pyspark"
mkdir -p ${JUPYTER_KERNEL_DIR}
cat >${JUPYTER_KERNEL_DIR}/kernel.json <<EOF
{
"argv": [
"python", "-m", "ipykernel", "-f", "{connection_file}"],
"display_name": "PySpark",
"language": "python",
"env": {
"SPARK_HOME": "/usr/lib/spark/",
"PYTHONPATH": "/usr/lib/spark/python/:${PY4J_ZIP}",
"PYTHONSTARTUP": "/usr/lib/spark/python/pyspark/shell.py",
"PYSPARK_SUBMIT_ARGS": "--master yarn --deploy-mode client ${PACKAGES_ARG} pyspark-shell"
}
}
EOF
jupyter kernelspec install $JUPYTER_KERNEL_DIR
echo "c.MappingKernelManager.default_kernel_name = 'pyspark'" >> ~/.jupyter/jupyter_notebook_config.py
echo "Jupyter setup!"
echo "Installing Jupyter service..."
# Create a separate runner file to make it easier to pull in the right
# environment variables, etc,. before launching the notebook.
JUPYTER_LAUNCHER='/usr/local/bin/launch_jupyter.sh'
cat << EOF > ${JUPYTER_LAUNCHER}
#!/bin/bash
source /etc/profile.d/conda.sh
/opt/conda/bin/jupyter notebook --allow-root --no-browser
EOF
chmod 750 ${JUPYTER_LAUNCHER}
INIT_SCRIPT="/usr/lib/systemd/system/jupyter-notebook.service"
cat << EOF > ${INIT_SCRIPT}
[Unit]
Description=Jupyter Notebook Server
[Service]
Type=simple
ExecStart=/bin/bash -c 'exec ${JUPYTER_LAUNCHER} \
&> /var/log/jupyter_notebook.log'
[Install]
WantedBy=multi-user.target
EOF
chmod a+rw ${INIT_SCRIPT}
echo "Starting Jupyter notebook..."
systemctl daemon-reload
systemctl enable jupyter-notebook
systemctl restart jupyter-notebook
systemctl status jupyter-notebook
echo "Jupyter installation succeeded" >&2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment