Skip to content

Instantly share code, notes, and snippets.

@zeppelinen
Last active February 3, 2019 22:25
Show Gist options
  • Save zeppelinen/c2dd13fda931733e27b87020304eb1b9 to your computer and use it in GitHub Desktop.
Save zeppelinen/c2dd13fda931733e27b87020304eb1b9 to your computer and use it in GitHub Desktop.
Setup kubeflow on AWS. Copy-pastable
# mac os x specific:
# install dependencies:
brew install awscli
brew install kubectl
brew install weaveworks/tap/eksctl
brew install ksonnet/tap/ks
brew install docker
brew install helm
# configure connection to docker daemon
# export DOCKER_HOST=ssh://user@docker-machine.com
# export environment variables to access AWS or configure ~/.aws/credentials
export AWS_DEFAULT_REGION=us-east-1
export AWS_ACCESS_KEY_ID=KEY_ID
export AWS_SECRET_ACCESS_KEY=KEY
# create Kubernetes cluster:
# -N - number of nodes
# -n - name of the cluster
# -t instance type
# --ssh-public-key - optional path to your public ssh key to install on cluster nodes
eksctl create cluster -N 3 -r us-east-1 -n kubeflow-1 -t m5.large --ssh-public-key ~/.ssh/id_rsa_4096.pub --timeout=90m
# check status of the cluster
kubectl describe nodes
# configure persistent volumes for Jupyter
# check if storage classes exist
kubectl get storageclass
# if it doesn't create storage class
cat <<EOF | kubectl create -f -
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: gp2
annotations:
storageclass.beta.kubernetes.io/is-default-class: "true"
provisioner: kubernetes.io/aws-ebs
parameters:
type: gp2
reclaimPolicy: Delete
mountOptions:
- debug
EOF
# check that storageclass is created
kubectl get storageclass
# kubeflow specific installation steps
export NAMESPACE=kubeflow
kubectl create namespace ${NAMESPACE}
# download kubeflow deploy script
export KUBEFLOW_VERSION=0.2.5
export KUBEFLOW_DEPLOY=false
curl https://raw.githubusercontent.com/kubeflow/kubeflow/v${KUBEFLOW_VERSION}/scripts/deploy.sh | bash
# launch installation
cd kubeflow_ks_app/
ks env set default --namespace ${NAMESPACE}
ks apply default
# check kubeflow deployment status
kubectl get pod -n ${NAMESPACE}
# at this point the installation should be finished
### Jupyter specific steps
# Create docker repository
export ACCOUNTID=`aws iam get-user|grep Arn|cut -f6 -d:`
aws ecr get-login --no-include-email --region us-east-1 > docker_login.sh
bash docker_login.sh
aws ecr create-repository --repository-name tensorflow-notebook-cpu --region us-east-1
# fetch the model
curl -o train.py https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/notebooks/train.py
curl -o seq2seq_utils.py https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/notebooks/seq2seq_utils.py
# Build, tag and push Jupyter notebook docker image to registy
docker build -t $ACCOUNTID.dkr.ecr.us-east-1.amazonaws.com/tensorflow-notebook-cpu:latest . -f-<<EOF
FROM gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu
RUN pip uninstall msgpack thinc
RUN pip install thinc
RUN pip install ktext annoy sklearn h5py nltk pydot matplotlib
COPY train.py /workdir/train.py
COPY seq2seq_utils.py /workdir/seq2seq_utils.py
EOF
docker push $ACCOUNTID.dkr.ecr.us-east-1.amazonaws.com/tensorflow-notebook-cpu:latest
# Connect to Jupyter Notebook
kubectl port-forward svc/tf-hub-lb -n ${NAMESPACE} 8080:80
# open http://google.com
# configure spawner options
# Image: image we created perviously - AWS_ID.dkr.ecr.us-east-1.amazonaws.com/tensorflow-notebook-cpu:latest
# CPU, Memory - up to you
# Then click "spawn"
# Check that we have jupyter-"something" pods running
kubectl get pod -n ${NAMESPACE} | grep jupyter
# launch jupyter web terminal and clone example kubeflow repo:
# git clone https://github.com/kubeflow/examples
#
# in jupyter console browse to the Training ipynb adapt training parameters and launch it
# set DATA_DIR to /home/jovyan/github-issues-data
# set training_data_size to amount of rows you want to process
# to launch select cell-run all
### Serve trained model
# create microservice with trained model and IssueSummarization.py script
# copy Dockerfiles and deps from github
git clone https://github.com/kubeflow/examples serve/
# run docker build
cd serve/github_issue_summarization/notebooks
docker run -v $(pwd):/my_model seldonio/core-python-wrapper:0.7 /my_model IssueSummarization 0.1 gcr.io --base-image=python:3.6 --image-name=gcr-repository-name/issue-summarization
# copy models from kubernetes
cd build
sudo chown `id -u` .
PODNAME=`kubectl get pods --namespace=${NAMESPACE} --selector="app=jupyterhub" --output=template --template="{{with index .items 0}}{{.metadata.name}}{{end}}"`
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/seq2seq_model_tutorial.h5 .
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/body_pp.dpkl .
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/title_pp.dpkl .
# There's an issue with ip settings of jupyter notebook scripts in some environments.
# See https://github.com/codenvy/codenvy/issues/2427 for details.
# take the start-notebook.sh from the kubeflow repo and put it to the build directory.
cp PATH_TO/kubeflow_repo/components/tsorflow-notebook-image/start-notebook.sh ./
# then edit this script and add the argument --ip=0.0.0.0 to jupyter command.
# to put the script in our serve model image open the Dockerfile and add the following line:
COPY start-notebook.sh /usr/local/bin/start-notebook.sh
before WORKDIR /microservice line
# also you need to copy again scripts and data from notebooks directory (the one inside build dir)
cp notebooks/IssueSummarization.py ./
cp notebooks/Training.ipynb ./
# than correct seldon requirements to make it compatible with current pandas version:
# edit seldon_requirements.txt and change the line with numpy to just numpy without any versions
# build and push service image to our registry
aws ecr create-repository --repository-name github-issue-summarization --region us-east-1
docker build --force-rm=true -t $ACCOUNTID.dkr.ecr.us-east-1.amazonaws.com/github-issue-summarization:latest .
docker push $ACCOUNTID.dkr.ecr.us-east-1.amazonaws.com/github-issue-summarization:latest
# serve the model
# go back to kubeflow_ks_app directory
ks generate seldon seldon --name=seldon
ks apply default -c seldon
# ensure it's up and running
kubectl get pods -n ${NAMESPACE} | grep seldon-cluster-manager
# deploy trained docker image as a deployment
ks generate seldon-serve-simple issue-summarization-model-serving \
--name=issue-summarization \
--image=$ACCOUNTID.dkr.ecr.us-east-1.amazonaws.com/github-issue-summarization:0.1 \
--replicas=3
ks apply default -c issue-summarization-model-serving
# check that we can access the model API
kubectl port-forward svc/ambassador -n ${NAMESPACE} 8081:80
# try to predict summary for an issue
curl -X POST -H 'Content-Type: application/json' -d '{"data":{"ndarray":[[".pyenv/versions/2.7.13/envs/ENV2/lib/python2.7/site-packages/keystoneauth1/adapter.py:136: UserWarning: Using keystoneclient sessions has been deprecated. Please update your software to use keystoneauth1. warnings.warn(Using keystoneclient sessions has been deprecated. Determining IP Address to use with a ping test. Checking ... IP to be used is: INFO: Connecting to Instance at IP: Warning: Identity file gvonlasz not accessible: No such file or directory."]]}}' http://localhost:8081/seldon/issue-summarization/api/v0.1/predictions
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment