- Save as
sa-tiller.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: tiller
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
Events: | |
Type Reason Age From Message | |
---- ------ ---- ---- ------- | |
Normal Scheduled 55s default-scheduler Successfully assigned kube-system/vpc-resource-controller-5ddd7b84dd-4kgls to ip-192-168-38-2.us-west-2.compute.internal | |
Warning Evicted 55s kubelet, ip-192-168-38-2.us-west-2.compute.internal The node had condition: [DiskPressure]. | |
➜ /tmp | |
kubectl get pods -n kube-system | |
NAME READY STATUS RESTARTS AGE |
``` | |
apiVersion: v1 | |
data: | |
hostfile: | | |
benchmark-eks-resnet-gpu-2-worker-0 slots=1 | |
benchmark-eks-resnet-gpu-2-worker-1 slots=1 | |
kubexec.sh: | | |
#!/bin/sh | |
set -x | |
POD_NAME=$1 |
``` | |
Step 6/12 : RUN apt install -y python python-pip && apt install -y python3 python3-pip && rm -r /usr/lib/python*/ensurepip && pip install --upgrade pip setuptools && rm -r /root/.cache && rm -rf /var/cache/apt/* | |
---> Running in 74256170f862 | |
WARNING: apt does not have a stable CLI interface. Use with caution in scripts. | |
Reading package lists... | |
Building dependency tree... | |
Reading state information... | |
The following additional packages will be installed: |
1T data | |
Kubernetes - Multiple AZ. | |
+---------+--------------------+-----------------+-----------------+ | |
|queryName|medianRuntimeSeconds|minRuntimeSeconds|maxRuntimeSeconds| | |
+---------+--------------------+-----------------+-----------------+ | |
| q64-v2.4| 991.0| 953| 1081| | |
| q70-v2.4| 83.0| 72| 105| | |
| q82-v2.4| 83.0| 77| 90| |
apiVersion: "serving.kubeflow.org/v1alpha2" | |
kind: "KFService" | |
metadata: | |
name: "mnist-s3" | |
spec: | |
default: | |
predictor: | |
serviceAccountName: kfserving-sa | |
tensorflow: | |
storageUri: "s3://kfserving-eks-demo/flowers" |
apiVersion: v1 | |
kind: Secret | |
metadata: | |
name: mysecret | |
annotations: | |
serving.kubeflow.org/s3-endpoint: s3.us-west-2.amazonaws.com | |
serving.kubeflow.org/s3-usehttps: "1" | |
serving.kubeflow.org/s3-verifyssl: "1" | |
serving.kubeflow.org/s3-region: us-west-2 | |
type: Opaque |
1. Need to copy hadoop-aws.jar | |
``` | |
19/09/07 18:24:48 WARN FileStreamSink: Error while looking for metadata directory. | |
Exception in thread "main" java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found | |
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2592) | |
at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:3320) | |
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3352) | |
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:124) | |
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3403) |
function fail { | |
echo $1 >&2 | |
exit 1 | |
} | |
# Only retry on `Unable to locate credentials` error | |
function retry { | |
local i=1 | |
local max=5 | |
local delay=10 |
sa-tiller.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: tiller
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
➜ oidc kubectl describe pod fairing-job-kd692-klq76 | |
Name: fairing-job-kd692-klq76 | |
Namespace: default | |
Priority: 0 | |
Node: ip-192-168-65-220.us-west-2.compute.internal/192.168.65.220 | |
Start Time: Thu, 08 Aug 2019 14:34:01 -0700 | |
Labels: controller-uid=3d1bbad5-ba24-11e9-9c70-02ddd37f1182 | |
fairing-deployer=job | |
fairing-id=3cdc1c42-ba24-11e9-9126-88e9fe523941 | |
job-name=fairing-job-kd692 |