Skip to content

Instantly share code, notes, and snippets.

@tedhtchang
Last active October 4, 2023 22:37
Show Gist options
  • Save tedhtchang/a5205a346bf7b1ab50f0633e792e4131 to your computer and use it in GitHub Desktop.
Save tedhtchang/a5205a346bf7b1ab50f0633e792e4131 to your computer and use it in GitHub Desktop.
local_interactive_aw.yaml
apiVersion: workload.codeflare.dev/v1beta1
kind: AppWrapper
metadata:
name: hfgputest-1
namespace: default
spec:
priority: 9
resources:
GenericItems:
- custompodresources:
- limits:
cpu: 2
memory: 8G
nvidia.com/gpu: 0
replicas: 1
requests:
cpu: 2
memory: 8G
nvidia.com/gpu: 0
- limits:
cpu: 1
memory: 4G
nvidia.com/gpu: 0
replicas: 1
requests:
cpu: 1
memory: 4G
nvidia.com/gpu: 0
generictemplate:
apiVersion: ray.io/v1alpha1
kind: RayCluster
metadata:
labels:
appwrapper.mcad.ibm.com: hfgputest-1
controller-tools.k8s.io: '1.0'
name: hfgputest-1
namespace: default
spec:
autoscalerOptions:
idleTimeoutSeconds: 60
imagePullPolicy: Always
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 500m
memory: 512Mi
upscalingMode: Default
enableInTreeAutoscaling: false
headGroupSpec:
rayStartParams:
block: 'true'
dashboard-host: 0.0.0.0
num-gpus: '0'
serviceType: ClusterIP
template:
spec:
containers:
- env:
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: RAY_USE_TLS
value: '1'
- name: RAY_TLS_SERVER_CERT
value: /home/ray/workspace/tls/server.crt
- name: RAY_TLS_SERVER_KEY
value: /home/ray/workspace/tls/server.key
- name: RAY_TLS_CA_CERT
value: /home/ray/workspace/tls/ca.crt
image: quay.io/project-codeflare/ray:2.5.0-py38-cu116
imagePullPolicy: Always
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- ray stop
name: ray-head
ports:
- containerPort: 6379
name: gcs
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
resources:
limits:
cpu: 2
memory: 8G
nvidia.com/gpu: 0
requests:
cpu: 2
memory: 8G
nvidia.com/gpu: 0
volumeMounts:
- mountPath: /home/ray/workspace/ca
name: ca-vol
readOnly: true
- mountPath: /home/ray/workspace/tls
name: server-cert
readOnly: true
imagePullSecrets: []
initContainers:
- command:
- sh
- -c
- cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048
-keyout server.key -out server.csr -subj '/CN=ray-head' && printf
"authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName
= @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3
= ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5
= rayclient-hfgputest-1-$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).cataract1.fyre.ibm.com">./domain.ext
&& cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt
-CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial
-extfile domain.ext
image: rayproject/ray:2.5.0
name: create-cert
volumeMounts:
- mountPath: /home/ray/workspace/ca
name: ca-vol
readOnly: true
- mountPath: /home/ray/workspace/tls
name: server-cert
readOnly: false
volumes:
- name: ca-vol
optional: false
secret:
secretName: ca-secret-hfgputest-1
- emptyDir: {}
name: server-cert
rayVersion: 2.5.0
workerGroupSpecs:
- groupName: small-group-hfgputest-1
maxReplicas: 1
minReplicas: 1
rayStartParams:
block: 'true'
num-gpus: '0'
replicas: 1
template:
metadata:
annotations:
key: value
labels:
key: value
spec:
containers:
- env:
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: RAY_USE_TLS
value: '1'
- name: RAY_TLS_SERVER_CERT
value: /home/ray/workspace/tls/server.crt
- name: RAY_TLS_SERVER_KEY
value: /home/ray/workspace/tls/server.key
- name: RAY_TLS_CA_CERT
value: /home/ray/workspace/tls/ca.crt
image: quay.io/project-codeflare/ray:2.5.0-py38-cu116
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- ray stop
name: machine-learning
resources:
limits:
cpu: 1
memory: 4G
nvidia.com/gpu: 0
requests:
cpu: 1
memory: 4G
nvidia.com/gpu: 0
volumeMounts:
- mountPath: /home/ray/workspace/ca
name: ca-vol
readOnly: true
- mountPath: /home/ray/workspace/tls
name: server-cert
readOnly: true
imagePullSecrets: []
initContainers:
- command:
- sh
- -c
- until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local;
do echo waiting for myservice; sleep 2; done
image: busybox:1.28
name: init-myservice
- command:
- sh
- -c
- cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048
-keyout server.key -out server.csr -subj '/CN=ray-head' && printf
"authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName
= @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3
= ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)">./domain.ext
&& cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt
-CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial
-extfile domain.ext
image: rayproject/ray:2.5.0
name: create-cert
volumeMounts:
- mountPath: /home/ray/workspace/ca
name: ca-vol
readOnly: true
- mountPath: /home/ray/workspace/tls
name: server-cert
readOnly: false
volumes:
- name: ca-vol
optional: false
secret:
secretName: ca-secret-hfgputest-1
- emptyDir: {}
name: server-cert
replicas: 1
- generictemplate:
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: ray-dashboard-hfgputest-1
namespace: default
spec:
rules:
- host: ray-dashboard-raytest.cataract1.fyre.ibm.com
http:
paths:
- backend:
service:
name: hfgputest-1-head-svc
port:
number: 8265
path: /
pathType: Prefix
replicas: 1
- generictemplate:
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
kubernetes.io/ingress.class: nginx
nginx.ingress.kubernetes.io/ssl-passthrough: "true"
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
labels:
odh-ray-cluster-service: hfgputest-1-head-svc
name: rayclient-hfgputest-1
namespace: default
spec:
rules:
- host: rayclient-hfgputest-1-default.cataract1.fyre.ibm.com
http:
paths:
- backend:
service:
name: hfgputest-1-head-svc
port:
number: 10001
# tls:
# termination: passthrough
path: /
pathType: Prefix
replicas: 1
- generictemplate:
apiVersion: v1
data:
ca.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNzRENDQVppZ0F3SUJBZ0lVVS9qKzhEeThrMnFwbFZnSk5GTEpRWDZYYVJzd0RRWUpLb1pJaHZjTkFRRUwKQlFBd0VqRVFNQTRHQTFVRUF3d0hjbTl2ZEMxallUQWVGdzB5TXpBNE1qRXhOekkwTXpsYUZ3MHlOREE0TWpFeApOekkwTXpsYU1CSXhFREFPQmdOVkJBTU1CM0p2YjNRdFkyRXdnZ0VpTUEwR0NTcUdTSWIzRFFFQkFRVUFBNElCCkR3QXdnZ0VLQW9JQkFRRGxkTnpRbU9SaUw2Mk13RlIyTERFN0lBejRlMzVEZG5ZdVNMNEJqdVRWeUlLRkw3c3YKbVh1UWFwbmNsWVhDMERHUktTL29QZnI0Qjl4Q0M3c1JBSW4waXpVUzlSbEU3UzE4NGhDTTZpYkFmSGR4MGF3dgp5QjJ6T0RoMkVaaFBPVUpCZzE1TEc1MEx2VnBGME1aWVFUc2luOW4vR2N2SytjL3d4WVNpTno1MnlObW5yN3F6CjNNbzFBd3UxZnRzWXF4aG5rUnRVUEdiRUJpL2QrbW1lbjd2VE1FZ0tYUiswM2ZuZk1aZWNiQms5Nm02RHdlZWsKV2syVGY2NTFVa0ZRcW44WkFXQUZCcVEzZnBrMnVZaktYdVB4b3JyakJHVEFJK2dnMlgxaHJXN0dnL3dSVXNocQpha0l0RWZjU0pRZmtWUWh0SHhYODBlajRoUXExNWhlYlZHUnZBZ01CQUFFd0RRWUpLb1pJaHZjTkFRRUxCUUFECmdnRUJBTi91NW9tcmNKQWZZTHVUZTdHVEtsVWdRSUJ3dTNXTjFFNFE0RXJKUm5RSDVzWjRSUW90MUlscG44MVAKVXZrMyt0cXRPcXlZNDA3MTFhZEg0bGVlVThXVUtVL3dweUtJVWNCdDlUMFRSZ0tXTGlIZ3FIZzZFdzdtdUNIOAp3K3ZvODRkbHRtaDhTMFhwK1VhbmpQQ0hRTlBvSGR4SVlNeXBJYTZ0dE92SjVUNm5TTHhGSFl6K3VPejJFYzR5ClZSRjNoQi9rTTQ0ajV5VnBvY2RUcnFjNlVFamprbGw1K3kwUDhWWEt0QXl4bFNNN05FWVpaRTBlbThQNDRrNXoKejlaUFVUM3NMd1dsZXlpRXA0ZGNFdURGSE41N0xnK0RDSUJKSWs3YVRhQlJHSGtEaHZ0QjM1UkZ3YVN3Um1ZZgpyME5qeVdUSCs2dkpTeE43SGxYMjdkcUhUL1k9Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
ca.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2UUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktjd2dnU2pBZ0VBQW9JQkFRRGxkTnpRbU9SaUw2Mk0Kd0ZSMkxERTdJQXo0ZTM1RGRuWXVTTDRCanVUVnlJS0ZMN3N2bVh1UWFwbmNsWVhDMERHUktTL29QZnI0Qjl4QwpDN3NSQUluMGl6VVM5UmxFN1MxODRoQ002aWJBZkhkeDBhd3Z5QjJ6T0RoMkVaaFBPVUpCZzE1TEc1MEx2VnBGCjBNWllRVHNpbjluL0djdksrYy93eFlTaU56NTJ5Tm1ucjdxejNNbzFBd3UxZnRzWXF4aG5rUnRVUEdiRUJpL2QKK21tZW43dlRNRWdLWFIrMDNmbmZNWmVjYkJrOTZtNkR3ZWVrV2syVGY2NTFVa0ZRcW44WkFXQUZCcVEzZnBrMgp1WWpLWHVQeG9ycmpCR1RBSStnZzJYMWhyVzdHZy93UlVzaHFha0l0RWZjU0pRZmtWUWh0SHhYODBlajRoUXExCjVoZWJWR1J2QWdNQkFBRUNnZ0VBVGpkYjNpK0FDb2crSExDakdmVk9ndDBXRHMzcDZJMUZFOW53Q3cvdXFZRkUKSEtwOG9Ha3RXYms4NFppR2xEWHI3NU9lcFFnYnVOalZHQjV2a2dnZy9wOUZaRVR3YkRKOVF3aThvNHhkMFZlYwpUOHd0YWN4SmM0SkNuTnk2a3FidUV1NzVBWmxVRzJCY21WZmxoTWdYcEdIaEtiaGRZNzlzUnFMTjU0MzFXUC9SCkRtWkZFVDNpVlJ1ZlE3dHdXSUFzbS9DVWsza2Z4T1g0ZUhXMFVtcTNIR0VJeDhlZVpuMkhtM0tVeVBFZTZ0TDkKR2kxeE52V2twY2VPNGpsSjl4c1Y4UTlpY1dseitMYmNXelZDcVB3dThSSXhsM1BSZlp4YzF1ZUJkb0txMjNhYgpMdmp5cDJRTUdaYXNMbmhBZnEzYkR0RTZPNmxFdVp3MnByMXNPTERFd1FLQmdRRDI2SGtZMGRBemEvZytwcjVMClpUUnlsWmU3Szl1QUo3NmJsUUNLZE1WZFdROFJuZkxZOG1PQ0tud3NXazAyN2NwWDZGWTlNNzVFdUlGdjB1ZGYKdTdxbkR4SlJaaE9uUWMvaTRqMHFVcGdxZm5aUGFLQ3lHUk9wRXZLb3h0Slk0WHcvK0J5cGdERk9QYW9NUnpLdgo2eGk2U2tCS1ZvVFRScTl5VCs3RDNxaU55UUtCZ1FEdDUrRHNVL2RmRHhyTE8wWmIzQ2tybmdidi9tc0tIR3VMCnI0alVIWWlyZm1RVFdVdm9RK3hOaGt2bVBwMTgxSktYRXpXaGZ6NmZoMHJVcnovNXU3K3o5cDhJQXVBS28rQnUKNUdWS28vbjRtVmZBVGJwZGJOUkRhczg0WG55bFA2SWhKcStkdmdFc3pOdmRXWEw1YklqaENVbzV2WFJjMkwxQQp1VmZIdWZTY2R3S0JnRUorNVc0cnczaTEwVXQrT0syeGVlaUtKUzBCYU1JZzhoS1FsRzYxZUJubHpFc2dVTERnCnhxL2RwVWVFb2xyeXU1WGxPcVNNOU9jcEVhTCtGOVlPaHZ4SzhOSC9QYmZEMzZybzV6UW92bFpqYkpXQ1JoVHcKNy95TzM4OVlqSG5uY0VFQ0h5WC9Ja2hDRHpQelVORlQ3WHJ1V0ZZczZ1VERtcWljb2w4WjdpY1JBb0dBRjlwMgovOXNQWTNXVlB5bExUYXlPYURRcEIra2ZmRjBMSDI5VExjbGtTYVdialNFWDdrclpKNUMralhCYnhBZGdSMHVDCnhoV0hCdUcrekFKaVRMelpaTWJtZlJ3a3NJeHIxVktMSVdlOWlZSitmT2FPT0EzUVhvNFlCdFBhZFlzaXkyWWcKbDVWVWN0UjdldjhiK3d3NTBTNW9SYy9NR2Y4VlFxY1N1UkRaQ05NQ2dZRUE5aGZlYXJrSWVncjg1S3BqS3FFMwpobVo3dTFqYnU2ZWxwUHJaY0R3cDFoRGFWN2JDMGJWa1BtNllBdGVVdDdYTnhpdk92R3dVZGtWV3dFR25CUXExCnJaTlQrOGhtNHBBMldGbDBwOEd0Y1FpVjFlcnFsZHBRaGpBVFVmNGpuNnMwZEhNNzNUNHkwYkhkY3o4WWQzN1YKdTl6Vmg5aXM4ZFZCUmVWczVvdnFFTmc9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K
kind: Secret
metadata:
labels:
odh-ray-cluster-service: hfgputest-1-head-svc
name: ca-secret-hfgputest-1
namespace: default
replicas: 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment