Last active
March 30, 2018 21:57
-
-
Save npatta01/f444066c0c54d2c3cfe7f49265c890ac to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
# Ensure we have conda installed. | |
PROJ_DIR=$PWD | |
cd $PROJ_DIR | |
MINICONDA_VERSION='4.2.12' | |
MINICONDA_VARIANT="3" | |
OS_TYPE="Linux-x86_64.sh" | |
MINICONDA_FULL_NAME="Miniconda$MINICONDA_VARIANT-$MINICONDA_VERSION-$OS_TYPE" | |
MINICONDA_SCRIPT_PATH="$PROJ_DIR/$MINICONDA_FULL_NAME" | |
CONDA_INSTALL_PATH="/opt/conda" | |
CONDA_BIN_PATH="$CONDA_INSTALL_PATH/bin" | |
export PATH="$CONDA_BIN_PATH:$PATH" | |
echo "Complete Miniconda version resolved to: $MINICONDA_FULL_NAME" | |
wget https://repo.continuum.io/miniconda/$MINICONDA_FULL_NAME -P "$PROJ_DIR" | |
echo "Downloaded $MINICONDA_FULL_NAME!" | |
ls -al $MINICONDA_SCRIPT_PATH | |
chmod 755 $MINICONDA_SCRIPT_PATH | |
echo "Installing $MINICONDA_FULL_NAME to $CONDA_INSTALL_PATH..." | |
bash $MINICONDA_SCRIPT_PATH -b -p $CONDA_INSTALL_PATH -f | |
chmod 755 $CONDA_INSTALL_PATH | |
#create symlink | |
ln -sf $CONDA_INSTALL_PATH "$PROJ_DIR/miniconda" | |
chmod 755 "$PROJ_DIR/miniconda" | |
echo "Adding path definition to profiles..." | |
echo "export CONDA_BIN_PATH=$CONDA_BIN_PATH" | tee -a /etc/profile.d/conda.sh #/etc/*bashrc /etc/profile | |
echo 'export PATH=$CONDA_BIN_PATH:$PATH' | tee -a /etc/profile.d/conda.sh #/etc/*bashrc /etc/profile | |
echo "export PYSPARK_PYTHON=$CONDA_BIN_PATH/python" | tee -a /etc/profile.d/conda.sh /etc/environment /usr/lib/spark/conf/spark-env.sh | |
#### CONDA DONE ### | |
conda config --set always_yes true --set changeps1 false | |
source /etc/profile.d/conda.sh | |
conda install pandas scikit-learn jupyter | |
conda install -c conda-forge jupyterlab setuptools | |
conda install -c conda-forge jupyter_contrib_nbextensions | |
jupyter serverextension enable --py jupyterlab --sys-prefix | |
echo "export PYTHONHASHSEED=0" | tee -a /etc/profile.d/conda.sh #/etc/*bashrc /usr/lib/spark/conf/spark-env.sh | |
echo "spark.executorEnv.PYTHONHASHSEED=0" >> /etc/spark/conf/spark-defaults.conf | |
# For storing notebooks on GCS. Pin version to make this script hermetic. | |
pip install jgscm==0.1.7 | |
pip install google-cloud-storage | |
echo "Completed installing Jupyter!" | |
JUPYTER_PORT=8123 | |
JUPYTER_IP=* | |
JUPYTER_NOTEBOOK_DIR="/root/notebooks" | |
[[ ! -d $JUPYTER_NOTEBOOK_DIR ]] && mkdir -p $JUPYTER_NOTEBOOK_DIR | |
echo "Creating Jupyter config..." | |
jupyter notebook --allow-root --generate-config -y --ip=${JUPYTER_IP} | |
echo "c.Application.log_level = 'DEBUG'" >> ~/.jupyter/jupyter_notebook_config.py | |
echo "c.NotebookApp.ip = '*'" >> ~/.jupyter/jupyter_notebook_config.py | |
echo "c.NotebookApp.open_browser = False" >> ~/.jupyter/jupyter_notebook_config.py | |
echo "c.NotebookApp.port = $JUPYTER_PORT" >> ~/.jupyter/jupyter_notebook_config.py | |
echo "c.NotebookApp.notebook_dir = '$JUPYTER_NOTEBOOK_DIR'" >> ~/.jupyter/jupyter_notebook_config.py | |
echo "c.NotebookApp.token = u'$JUPYTER_AUTH_TOKEN'" >> ~/.jupyter/jupyter_notebook_config.py | |
echo "Installing pyspark Kernel..." | |
SPARK_MAJOR_VERSION=$(spark-submit --version |& \ | |
grep 'version' | head -n 1 | sed 's/.*version //' | cut -d '.' -f 1) | |
echo "Determined SPARK_MAJOR_VERSION to be '${SPARK_MAJOR_VERSION}'" >&2 | |
# This will let us exit with error code if not found. | |
PY4J_ZIP=$(ls /usr/lib/spark/python/lib/py4j-*.zip) | |
# In case there are multiple py4j versions or unversioned symlinks to the | |
# versioned file, just choose the first one to use for jupyter. | |
PY4J_ZIP=$(echo ${PY4J_ZIP} | cut -d ' ' -f 1) | |
echo "Found PY4J_ZIP: '${PY4J_ZIP}'" >&2 | |
PACKAGES_ARG='' | |
JUPYTER_KERNEL_DIR="kernels/pyspark" | |
mkdir -p ${JUPYTER_KERNEL_DIR} | |
cat >${JUPYTER_KERNEL_DIR}/kernel.json <<EOF | |
{ | |
"argv": [ | |
"python", "-m", "ipykernel", "-f", "{connection_file}"], | |
"display_name": "PySpark", | |
"language": "python", | |
"env": { | |
"SPARK_HOME": "/usr/lib/spark/", | |
"PYTHONPATH": "/usr/lib/spark/python/:${PY4J_ZIP}", | |
"PYTHONSTARTUP": "/usr/lib/spark/python/pyspark/shell.py", | |
"PYSPARK_SUBMIT_ARGS": "--master yarn --deploy-mode client ${PACKAGES_ARG} pyspark-shell" | |
} | |
} | |
EOF | |
jupyter kernelspec install $JUPYTER_KERNEL_DIR | |
echo "c.MappingKernelManager.default_kernel_name = 'pyspark'" >> ~/.jupyter/jupyter_notebook_config.py | |
echo "Jupyter setup!" | |
echo "Installing Jupyter service..." | |
# Create a separate runner file to make it easier to pull in the right | |
# environment variables, etc,. before launching the notebook. | |
JUPYTER_LAUNCHER='/usr/local/bin/launch_jupyter.sh' | |
cat << EOF > ${JUPYTER_LAUNCHER} | |
#!/bin/bash | |
source /etc/profile.d/conda.sh | |
/opt/conda/bin/jupyter notebook --allow-root --no-browser | |
EOF | |
chmod 750 ${JUPYTER_LAUNCHER} | |
INIT_SCRIPT="/usr/lib/systemd/system/jupyter-notebook.service" | |
cat << EOF > ${INIT_SCRIPT} | |
[Unit] | |
Description=Jupyter Notebook Server | |
[Service] | |
Type=simple | |
ExecStart=/bin/bash -c 'exec ${JUPYTER_LAUNCHER} \ | |
&> /var/log/jupyter_notebook.log' | |
[Install] | |
WantedBy=multi-user.target | |
EOF | |
chmod a+rw ${INIT_SCRIPT} | |
echo "Starting Jupyter notebook..." | |
systemctl daemon-reload | |
systemctl enable jupyter-notebook | |
systemctl restart jupyter-notebook | |
systemctl status jupyter-notebook | |
echo "Jupyter installation succeeded" >&2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment