Created
February 10, 2021 00:00
-
-
Save ottobricks/ed6d6b8ec5526b1ec52980812c52da96 to your computer and use it in GitHub Desktop.
SageMaker Lifecycle for Python3.9 + PySpark3.0.1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script will create a SageMaker instance with a custom Python and Jupyter kernel, | |
# in addition to a PySpark3.0.1 | |
set -e | |
sudo -u ec2-user -i <<'EOF' | |
mkdir -p /home/ec2-user/SageMaker/kernels | |
conda update -n base conda --yes | |
conda create --prefix /home/ec2-user/SageMaker/kernels/python39 -c conda-forge \ | |
python=3.9.1 \ | |
black=20.8b1 \ | |
boto3=1.16.63 \ | |
botocore=1.19.63 \ | |
findspark=1.3.0 \ | |
ipykernel=5.4.3\ | |
jupytext=1.9.1 \ | |
matplotlib=3.3.4 \ | |
nbconvert=6.0.7 \ | |
notebook=6.1.6 \ | |
numpydoc=1.1.0 \ | |
pandas=1.2.1 \ | |
pyarrow=3.0.0 \ | |
pyspark=3.0.1 \ | |
python-language-server=0.36.2 \ | |
s3fs=0.5.2 \ | |
s3transfer=0.3.4 \ | |
sagemaker-python-sdk=2.24.3 \ | |
seaborn=0.11.1 \ | |
sphinx=3.4.3 \ | |
spyder=4.2.0 \ | |
tsfresh=0.17 \ | |
--yes | |
# Download pre-built PySpark | |
wget "https://downloads.apache.org/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz" -O /tmp/spark-3.0.1-bin-hadoop3.2.tgz | |
# Unpack PySpark | |
echo "Creating folder /home/ec2-user/SageMaker/spark..." | |
mkdir -p /home/ec2-user/SageMaker/spark | |
echo "Untar spark bundle..." | |
tar -xvf /tmp/spark-3.0.1-bin-hadoop3.2.tgz -C /home/ec2-user/SageMaker/spark --strip-components=1 | |
echo "Done" | |
EOF |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script will look for Python kernels installed with the on_create.sh script | |
# and set SPARK PATH variables | |
set -e | |
sudo -u ec2-user -i <<'EOF' | |
# To be executed at each start-up to ensure replication of conda envs in Conda & in Jupyter | |
if [ -d "/home/ec2-user/SageMaker/kernels" ]; then | |
for env in /home/ec2-user/SageMaker/kernels/*; do | |
ln -s $env /home/ec2-user/anaconda3/envs/$(basename "$env"); | |
source /home/ec2-user/anaconda3/bin/activate $(basename "$env"); | |
python -m ipykernel install --user --name $(basename "$env") --display-name $(basename "$env"); | |
conda deactivate; | |
done | |
fi | |
EOF | |
# Set SparkEnv Variables | |
sudo cat > /etc/profile.d/jupyter-env.sh<<EOL | |
export SPARK_HOME=/home/ec2-user/SageMaker/spark | |
export PATH=$SPARK_HOME/bin:$PATH | |
export PYSPARK_DRIVER_PYTHON=jupyter | |
export PYSPARK_DRIVER_PYTHON_OPTS='notebook' | |
EOL | |
sudo initctl restart jupyter-server --no-wait |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment