Created
January 6, 2023 19:09
-
-
Save augray/99ebdcde3d17d0343ea9d62ed14a1730 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
apiVersion: ray.io/v1alpha1 | |
kind: RayCluster | |
metadata: | |
labels: | |
controller-tools.k8s.io: "1.0" | |
# A unique identifier for the head node and workers of this cluster. | |
name: raycluster-complete | |
spec: | |
rayVersion: '2.1.0' | |
######################headGroupSpec################################# | |
# Ray head pod template and specs | |
headGroupSpec: | |
# Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer' | |
serviceType: ClusterIP | |
# the following params are used to complete the ray start: ray start --head --block --dashboard-host: '0.0.0.0' ... | |
rayStartParams: | |
dashboard-host: '0.0.0.0' | |
block: 'true' | |
#pod template | |
template: | |
metadata: | |
# Custom labels. NOTE: To avoid conflicts with KubeRay operator, do not define custom labels start with `raycluster`. | |
# Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ | |
labels: {} | |
spec: | |
containers: | |
- name: ray-head | |
#image: rayproject/ray:2.1.0 | |
image: 558717131297.dkr.ecr.us-west-2.amazonaws.com/sematic-dev@sha256:6f089f664f135fe62b9fac3ed0261c0a7b794ba518bc109f025e516555aa9791 | |
ports: | |
- containerPort: 6379 | |
name: gcs | |
- containerPort: 8265 | |
name: dashboard | |
- containerPort: 10001 | |
name: client | |
lifecycle: | |
preStop: | |
exec: | |
command: ["/bin/sh","-c","ray stop"] | |
volumeMounts: | |
- mountPath: /tmp/ray | |
name: ray-logs | |
# The resource requests and limits in this config are too small for production! | |
# For an example with more realistic resource configuration, see | |
# ray-cluster.autoscaler.large.yaml. | |
# It is better to use a few large Ray pod than many small ones. | |
# For production, it is ideal to size each Ray pod to take up the | |
# entire Kubernetes node on which it is scheduled. | |
resources: | |
limits: | |
cpu: "1" | |
memory: "2G" | |
requests: | |
# For production use-cases, we recommend specifying integer CPU reqests and limits. | |
# We also recommend setting requests equal to limits for both CPU and memory. | |
# For this example, we use a 500m CPU request to accomodate resource-constrained local | |
# Kubernetes testing environments such as KinD and minikube. | |
cpu: "500m" | |
memory: "2G" | |
volumes: | |
- name: ray-logs | |
emptyDir: {} | |
workerGroupSpecs: | |
# the pod replicas in this group typed worker | |
- replicas: 1 | |
minReplicas: 1 | |
maxReplicas: 10 | |
# logical group name, for this called small-group, also can be functional | |
groupName: small-group | |
# If worker pods need to be added, we can increment the replicas. | |
# If worker pods need to be removed, we decrement the replicas, and populate the workersToDelete list. | |
# The operator will remove pods from the list until the desired number of replicas is satisfied. | |
# If the difference between the current replica count and the desired replicas is greater than the | |
# number of entries in workersToDelete, random worker pods will be deleted. | |
#scaleStrategy: | |
# workersToDelete: | |
# - raycluster-complete-worker-small-group-bdtwh | |
# - raycluster-complete-worker-small-group-hv457 | |
# - raycluster-complete-worker-small-group-k8tj7 | |
# the following params are used to complete the ray start: ray start --block | |
rayStartParams: | |
block: 'true' | |
#pod template | |
template: | |
spec: | |
containers: | |
- name: ray-worker | |
#image: rayproject/ray:2.1.0 | |
image: 558717131297.dkr.ecr.us-west-2.amazonaws.com/sematic-dev@sha256:6f089f664f135fe62b9fac3ed0261c0a7b794ba518bc109f025e516555aa9791 | |
lifecycle: | |
preStop: | |
exec: | |
command: ["/bin/sh","-c","ray stop"] | |
# use volumeMounts.Optional. | |
# Refer to https://kubernetes.io/docs/concepts/storage/volumes/ | |
volumeMounts: | |
- mountPath: /tmp/ray | |
name: ray-logs | |
# The resource requests and limits in this config are too small for production! | |
# For an example with more realistic resource configuration, see | |
# ray-cluster.autoscaler.large.yaml. | |
# It is better to use a few large Ray pod than many small ones. | |
# For production, it is ideal to size each Ray pod to take up the | |
# entire Kubernetes node on which it is scheduled. | |
resources: | |
limits: | |
cpu: "1" | |
memory: "1G" | |
# For production use-cases, we recommend specifying integer CPU reqests and limits. | |
# We also recommend setting requests equal to limits for both CPU and memory. | |
# For this example, we use a 500m CPU request to accomodate resource-constrained local | |
# Kubernetes testing environments such as KinD and minikube. | |
requests: | |
# For production use-cases, we recommend specifying integer CPU reqests and limits. | |
# We also recommend setting requests equal to limits for both CPU and memory. | |
# For this example, we use a 500m CPU request to accomodate resource-constrained local | |
# Kubernetes testing environments such as KinD and minikube. | |
cpu: "500m" | |
# For production use-cases, we recommend allocating at least 8Gb memory for each Ray container. | |
memory: "1G" | |
initContainers: | |
# the env var $RAY_IP is set by the operator if missing, with the value of the head service name | |
- name: init | |
image: busybox:1.28 | |
# Change the cluster postfix if you don't have a default setting | |
command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for K8s Service $RAY_IP; sleep 2; done"] | |
# use volumes | |
# Refer to https://kubernetes.io/docs/concepts/storage/volumes/ | |
volumes: | |
- name: ray-logs | |
emptyDir: {} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment