Last active
February 3, 2019 22:25
-
-
Save zeppelinen/c2dd13fda931733e27b87020304eb1b9 to your computer and use it in GitHub Desktop.
Setup kubeflow on AWS. Copy-pastable
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# mac os x specific: | |
# install dependencies: | |
brew install awscli | |
brew install kubectl | |
brew install weaveworks/tap/eksctl | |
brew install ksonnet/tap/ks | |
brew install docker | |
brew install helm | |
# configure connection to docker daemon | |
# export DOCKER_HOST=ssh://user@docker-machine.com | |
# export environment variables to access AWS or configure ~/.aws/credentials | |
export AWS_DEFAULT_REGION=us-east-1 | |
export AWS_ACCESS_KEY_ID=KEY_ID | |
export AWS_SECRET_ACCESS_KEY=KEY | |
# create Kubernetes cluster: | |
# -N - number of nodes | |
# -n - name of the cluster | |
# -t instance type | |
# --ssh-public-key - optional path to your public ssh key to install on cluster nodes | |
eksctl create cluster -N 3 -r us-east-1 -n kubeflow-1 -t m5.large --ssh-public-key ~/.ssh/id_rsa_4096.pub --timeout=90m | |
# check status of the cluster | |
kubectl describe nodes | |
# configure persistent volumes for Jupyter | |
# check if storage classes exist | |
kubectl get storageclass | |
# if it doesn't create storage class | |
cat <<EOF | kubectl create -f - | |
kind: StorageClass | |
apiVersion: storage.k8s.io/v1 | |
metadata: | |
name: gp2 | |
annotations: | |
storageclass.beta.kubernetes.io/is-default-class: "true" | |
provisioner: kubernetes.io/aws-ebs | |
parameters: | |
type: gp2 | |
reclaimPolicy: Delete | |
mountOptions: | |
- debug | |
EOF | |
# check that storageclass is created | |
kubectl get storageclass | |
# kubeflow specific installation steps | |
export NAMESPACE=kubeflow | |
kubectl create namespace ${NAMESPACE} | |
# download kubeflow deploy script | |
export KUBEFLOW_VERSION=0.2.5 | |
export KUBEFLOW_DEPLOY=false | |
curl https://raw.githubusercontent.com/kubeflow/kubeflow/v${KUBEFLOW_VERSION}/scripts/deploy.sh | bash | |
# launch installation | |
cd kubeflow_ks_app/ | |
ks env set default --namespace ${NAMESPACE} | |
ks apply default | |
# check kubeflow deployment status | |
kubectl get pod -n ${NAMESPACE} | |
# at this point the installation should be finished | |
### Jupyter specific steps | |
# Create docker repository | |
export ACCOUNTID=`aws iam get-user|grep Arn|cut -f6 -d:` | |
aws ecr get-login --no-include-email --region us-east-1 > docker_login.sh | |
bash docker_login.sh | |
aws ecr create-repository --repository-name tensorflow-notebook-cpu --region us-east-1 | |
# fetch the model | |
curl -o train.py https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/notebooks/train.py | |
curl -o seq2seq_utils.py https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/notebooks/seq2seq_utils.py | |
# Build, tag and push Jupyter notebook docker image to registy | |
docker build -t $ACCOUNTID.dkr.ecr.us-east-1.amazonaws.com/tensorflow-notebook-cpu:latest . -f-<<EOF | |
FROM gcr.io/kubeflow-images-public/tensorflow-1.9.0-notebook-cpu | |
RUN pip uninstall msgpack thinc | |
RUN pip install thinc | |
RUN pip install ktext annoy sklearn h5py nltk pydot matplotlib | |
COPY train.py /workdir/train.py | |
COPY seq2seq_utils.py /workdir/seq2seq_utils.py | |
EOF | |
docker push $ACCOUNTID.dkr.ecr.us-east-1.amazonaws.com/tensorflow-notebook-cpu:latest | |
# Connect to Jupyter Notebook | |
kubectl port-forward svc/tf-hub-lb -n ${NAMESPACE} 8080:80 | |
# open http://google.com | |
# configure spawner options | |
# Image: image we created perviously - AWS_ID.dkr.ecr.us-east-1.amazonaws.com/tensorflow-notebook-cpu:latest | |
# CPU, Memory - up to you | |
# Then click "spawn" | |
# Check that we have jupyter-"something" pods running | |
kubectl get pod -n ${NAMESPACE} | grep jupyter | |
# launch jupyter web terminal and clone example kubeflow repo: | |
# git clone https://github.com/kubeflow/examples | |
# | |
# in jupyter console browse to the Training ipynb adapt training parameters and launch it | |
# set DATA_DIR to /home/jovyan/github-issues-data | |
# set training_data_size to amount of rows you want to process | |
# to launch select cell-run all | |
### Serve trained model | |
# create microservice with trained model and IssueSummarization.py script | |
# copy Dockerfiles and deps from github | |
git clone https://github.com/kubeflow/examples serve/ | |
# run docker build | |
cd serve/github_issue_summarization/notebooks | |
docker run -v $(pwd):/my_model seldonio/core-python-wrapper:0.7 /my_model IssueSummarization 0.1 gcr.io --base-image=python:3.6 --image-name=gcr-repository-name/issue-summarization | |
# copy models from kubernetes | |
cd build | |
sudo chown `id -u` . | |
PODNAME=`kubectl get pods --namespace=${NAMESPACE} --selector="app=jupyterhub" --output=template --template="{{with index .items 0}}{{.metadata.name}}{{end}}"` | |
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/seq2seq_model_tutorial.h5 . | |
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/body_pp.dpkl . | |
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/title_pp.dpkl . | |
# There's an issue with ip settings of jupyter notebook scripts in some environments. | |
# See https://github.com/codenvy/codenvy/issues/2427 for details. | |
# take the start-notebook.sh from the kubeflow repo and put it to the build directory. | |
cp PATH_TO/kubeflow_repo/components/tsorflow-notebook-image/start-notebook.sh ./ | |
# then edit this script and add the argument --ip=0.0.0.0 to jupyter command. | |
# to put the script in our serve model image open the Dockerfile and add the following line: | |
COPY start-notebook.sh /usr/local/bin/start-notebook.sh | |
before WORKDIR /microservice line | |
# also you need to copy again scripts and data from notebooks directory (the one inside build dir) | |
cp notebooks/IssueSummarization.py ./ | |
cp notebooks/Training.ipynb ./ | |
# than correct seldon requirements to make it compatible with current pandas version: | |
# edit seldon_requirements.txt and change the line with numpy to just numpy without any versions | |
# build and push service image to our registry | |
aws ecr create-repository --repository-name github-issue-summarization --region us-east-1 | |
docker build --force-rm=true -t $ACCOUNTID.dkr.ecr.us-east-1.amazonaws.com/github-issue-summarization:latest . | |
docker push $ACCOUNTID.dkr.ecr.us-east-1.amazonaws.com/github-issue-summarization:latest | |
# serve the model | |
# go back to kubeflow_ks_app directory | |
ks generate seldon seldon --name=seldon | |
ks apply default -c seldon | |
# ensure it's up and running | |
kubectl get pods -n ${NAMESPACE} | grep seldon-cluster-manager | |
# deploy trained docker image as a deployment | |
ks generate seldon-serve-simple issue-summarization-model-serving \ | |
--name=issue-summarization \ | |
--image=$ACCOUNTID.dkr.ecr.us-east-1.amazonaws.com/github-issue-summarization:0.1 \ | |
--replicas=3 | |
ks apply default -c issue-summarization-model-serving | |
# check that we can access the model API | |
kubectl port-forward svc/ambassador -n ${NAMESPACE} 8081:80 | |
# try to predict summary for an issue | |
curl -X POST -H 'Content-Type: application/json' -d '{"data":{"ndarray":[[".pyenv/versions/2.7.13/envs/ENV2/lib/python2.7/site-packages/keystoneauth1/adapter.py:136: UserWarning: Using keystoneclient sessions has been deprecated. Please update your software to use keystoneauth1. warnings.warn(Using keystoneclient sessions has been deprecated. Determining IP Address to use with a ping test. Checking ... IP to be used is: INFO: Connecting to Instance at IP: Warning: Identity file gvonlasz not accessible: No such file or directory."]]}}' http://localhost:8081/seldon/issue-summarization/api/v0.1/predictions |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment