Last active
April 25, 2018 09:31
-
-
Save timfanda35/c5c32372cf9f95187d3515c4fbf0e636 to your computer and use it in GitHub Desktop.
Snippet for installing kubeflow on GKE
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
# Requirement | |
# | |
# * gcloud SDK https://cloud.google.com/sdk/downloads | |
# * kubectl https://kubernetes.io/docs/tasks/tools/install-kubectl/ | |
# * ksonnet https://ksonnet.io/ | |
# | |
# * Request GPU Quota in Cloud Console | |
# | |
# | |
# Setting common parameters | |
# | |
# Change here!!! | |
# | |
PROJECT_ID="CHANGE_TO_YOUR_PROJECT_ID" | |
ZONE="asia-east1-b" | |
K8S_ADMIN_USER="CHANGE_TO_YOUR_USER_ACCOUNT" | |
gcloud config set project ${PROJECT_ID} | |
gcloud config set compute/zone ${ZONE} | |
# Create Kubernets cluster | |
# | |
# * version >= 1.9 | |
# * vCPU >= 2 | |
# | |
# | |
CLUSTER_NAME="cluster-1" | |
CLUSTER_VERSION="1.9.6-gke.1" | |
K8S_API_VERSION="1.9.6" # for kubeflow | |
MACHINE_TYPE="n1-standard-2" | |
gcloud container clusters create ${CLUSTER_NAME} \ | |
--cluster-version ${CLUSTER_VERSION} \ | |
--machine-type ${MACHINE_TYPE} | |
# Setup kubectl context | |
# | |
# | |
gcloud container clusters get-credentials ${CLUSTER_NAME} | |
# Setup RBAC | |
# | |
# | |
kubectl create clusterrolebinding default-admin --clusterrole=cluster-admin --user=${K8S_ADMIN_USER} | |
# Create namespace for kubeflow deployment | |
# | |
# | |
NAMESPACE="kubeflow" | |
kubectl create namespace ${NAMESPACE} | |
################################################################ | |
# | |
# Add GPU Support | |
# | |
# https://cloud.google.com/kubernetes-engine/docs/concepts/gpus | |
# | |
# Deploy GPU driver plugin | |
# | |
# | |
kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/k8s-1.9/nvidia-driver-installer/cos/daemonset-preloaded.yaml | |
# Create GPU node-pool | |
# | |
# GPU_TYPE | |
# | |
# * nvidia-tesla-k80 | |
# * nvidia-tesla-p100 | |
# | |
# | |
POOL_NAME="gpu-k80-node-pool" | |
GPU_TYPE="nvidia-tesla-k80" | |
GPU_COUNT_PER_NODE="1" | |
gcloud beta container node-pools create ${POOL_NAME} \ | |
--cluster ${CLUSTER_NAME} \ | |
--accelerator "type=${GPU_TYPE},count=${GPU_COUNT_PER_NODE}" \ | |
--num-nodes 1 --min-nodes 0 --max-nodes 3 --enable-autoscaling | |
################################################################ | |
# | |
# Install kubeflow | |
# | |
# https://github.com/kubeflow/kubeflow | |
# | |
# Create ksonnet project | |
# | |
# | |
APP_NAME="my-kubeflow" | |
ks init ${APP_NAME} --api-spec=version:v${K8S_API_VERSION} | |
cd ${APP_NAME} | |
# Download kubeflow packages | |
# | |
# | |
KUBEFLOW_VERSION="v0.1.2" | |
ks registry add kubeflow github.com/kubeflow/kubeflow/tree/${KUBEFLOW_VERSION}/kubeflow | |
ks pkg install kubeflow/core@${KUBEFLOW_VERSION} | |
ks pkg install kubeflow/tf-serving@${KUBEFLOW_VERSION} | |
ks pkg install kubeflow/tf-job@${KUBEFLOW_VERSION} | |
ks generate core kubeflow-core --name=kubeflow-core | |
# Setting gle environment | |
# | |
# | |
KF_ENV="cloud" | |
ks env add ${KF_ENV} \ | |
--namespace ${NAMESPACE} \ | |
--api-spec=version:v${K8S_API_VERSION} | |
ks param set kubeflow-core cloud gke --env=${KF_ENV} | |
ks env set ${KF_ENV} | |
# Deploy kubeflow | |
ks apply ${KF_ENV} -c kubeflow-core |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# kubectl apply -f https://gist.githubusercontent.com/timfanda35/c5c32372cf9f95187d3515c4fbf0e636/raw/8ea54837a21f69eb46e5b7aab414a49ee2c2049e/tfjob_cpu.yaml | |
apiVersion: "kubeflow.org/v1alpha1" | |
kind: "TFJob" | |
metadata: | |
name: "tf-smoke-cpu" | |
spec: | |
replicaSpecs: | |
- tfReplicaType: MASTER | |
template: | |
spec: | |
containers: | |
- image: gcr.io/tf-on-k8s-dogfood/tf_sample:dc944ff | |
name: tensorflow | |
restartPolicy: OnFailure |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# kubectl apply -f https://gist.githubusercontent.com/timfanda35/c5c32372cf9f95187d3515c4fbf0e636/raw/a7e2324db214857dae4b0fc0f5227fb4257136f4/tfjob_gpu.yaml | |
apiVersion: "kubeflow.org/v1alpha1" | |
kind: "TFJob" | |
metadata: | |
name: "tf-smoke-gpu" | |
spec: | |
replicaSpecs: | |
- tfReplicaType: MASTER | |
template: | |
spec: | |
containers: | |
- image: gcr.io/tf-on-k8s-dogfood/tf_sample_gpu:dc944ff | |
name: tensorflow | |
resources: | |
limits: | |
nvidia.com/gpu: 1 | |
restartPolicy: OnFailure |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment