Skip to content

Instantly share code, notes, and snippets.

@timfanda35
Last active April 25, 2018 09:31
Show Gist options
  • Save timfanda35/c5c32372cf9f95187d3515c4fbf0e636 to your computer and use it in GitHub Desktop.
Save timfanda35/c5c32372cf9f95187d3515c4fbf0e636 to your computer and use it in GitHub Desktop.
Snippet for installing kubeflow on GKE
#! /bin/bash
# Requirement
#
# * gcloud SDK https://cloud.google.com/sdk/downloads
# * kubectl https://kubernetes.io/docs/tasks/tools/install-kubectl/
# * ksonnet https://ksonnet.io/
#
# * Request GPU Quota in Cloud Console
#
#
# Setting common parameters
#
# Change here!!!
#
PROJECT_ID="CHANGE_TO_YOUR_PROJECT_ID"
ZONE="asia-east1-b"
K8S_ADMIN_USER="CHANGE_TO_YOUR_USER_ACCOUNT"
gcloud config set project ${PROJECT_ID}
gcloud config set compute/zone ${ZONE}
# Create Kubernets cluster
#
# * version >= 1.9
# * vCPU >= 2
#
#
CLUSTER_NAME="cluster-1"
CLUSTER_VERSION="1.9.6-gke.1"
K8S_API_VERSION="1.9.6" # for kubeflow
MACHINE_TYPE="n1-standard-2"
gcloud container clusters create ${CLUSTER_NAME} \
--cluster-version ${CLUSTER_VERSION} \
--machine-type ${MACHINE_TYPE}
# Setup kubectl context
#
#
gcloud container clusters get-credentials ${CLUSTER_NAME}
# Setup RBAC
#
#
kubectl create clusterrolebinding default-admin --clusterrole=cluster-admin --user=${K8S_ADMIN_USER}
# Create namespace for kubeflow deployment
#
#
NAMESPACE="kubeflow"
kubectl create namespace ${NAMESPACE}
################################################################
#
# Add GPU Support
#
# https://cloud.google.com/kubernetes-engine/docs/concepts/gpus
#
# Deploy GPU driver plugin
#
#
kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/k8s-1.9/nvidia-driver-installer/cos/daemonset-preloaded.yaml
# Create GPU node-pool
#
# GPU_TYPE
#
# * nvidia-tesla-k80
# * nvidia-tesla-p100
#
#
POOL_NAME="gpu-k80-node-pool"
GPU_TYPE="nvidia-tesla-k80"
GPU_COUNT_PER_NODE="1"
gcloud beta container node-pools create ${POOL_NAME} \
--cluster ${CLUSTER_NAME} \
--accelerator "type=${GPU_TYPE},count=${GPU_COUNT_PER_NODE}" \
--num-nodes 1 --min-nodes 0 --max-nodes 3 --enable-autoscaling
################################################################
#
# Install kubeflow
#
# https://github.com/kubeflow/kubeflow
#
# Create ksonnet project
#
#
APP_NAME="my-kubeflow"
ks init ${APP_NAME} --api-spec=version:v${K8S_API_VERSION}
cd ${APP_NAME}
# Download kubeflow packages
#
#
KUBEFLOW_VERSION="v0.1.2"
ks registry add kubeflow github.com/kubeflow/kubeflow/tree/${KUBEFLOW_VERSION}/kubeflow
ks pkg install kubeflow/core@${KUBEFLOW_VERSION}
ks pkg install kubeflow/tf-serving@${KUBEFLOW_VERSION}
ks pkg install kubeflow/tf-job@${KUBEFLOW_VERSION}
ks generate core kubeflow-core --name=kubeflow-core
# Setting gle environment
#
#
KF_ENV="cloud"
ks env add ${KF_ENV} \
--namespace ${NAMESPACE} \
--api-spec=version:v${K8S_API_VERSION}
ks param set kubeflow-core cloud gke --env=${KF_ENV}
ks env set ${KF_ENV}
# Deploy kubeflow
ks apply ${KF_ENV} -c kubeflow-core
# kubectl apply -f https://gist.githubusercontent.com/timfanda35/c5c32372cf9f95187d3515c4fbf0e636/raw/8ea54837a21f69eb46e5b7aab414a49ee2c2049e/tfjob_cpu.yaml
apiVersion: "kubeflow.org/v1alpha1"
kind: "TFJob"
metadata:
name: "tf-smoke-cpu"
spec:
replicaSpecs:
- tfReplicaType: MASTER
template:
spec:
containers:
- image: gcr.io/tf-on-k8s-dogfood/tf_sample:dc944ff
name: tensorflow
restartPolicy: OnFailure
# kubectl apply -f https://gist.githubusercontent.com/timfanda35/c5c32372cf9f95187d3515c4fbf0e636/raw/a7e2324db214857dae4b0fc0f5227fb4257136f4/tfjob_gpu.yaml
apiVersion: "kubeflow.org/v1alpha1"
kind: "TFJob"
metadata:
name: "tf-smoke-gpu"
spec:
replicaSpecs:
- tfReplicaType: MASTER
template:
spec:
containers:
- image: gcr.io/tf-on-k8s-dogfood/tf_sample_gpu:dc944ff
name: tensorflow
resources:
limits:
nvidia.com/gpu: 1
restartPolicy: OnFailure
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment