Skip to content

Instantly share code, notes, and snippets.

@shrekris-anyscale
Last active November 23, 2022 01:37
Show Gist options
  • Save shrekris-anyscale/9131cee8e454a7f35b2ae24de99192d7 to your computer and use it in GitHub Desktop.
Save shrekris-anyscale/9131cee8e454a7f35b2ae24de99192d7 to your computer and use it in GitHub Desktop.
# File name: config.yaml
kind: ConfigMap
apiVersion: v1
metadata:
name: redis-config
labels:
app: redis
data:
redis.conf: |-
port 6379
bind 0.0.0.0
protected-mode no
requirepass 5241590000000000
---
apiVersion: v1
kind: Service
metadata:
name: redis
labels:
app: redis
spec:
type: ClusterIP
ports:
- name: redis
port: 6379
selector:
app: redis
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis
labels:
app: redis
spec:
replicas: 1
selector:
matchLabels:
app: redis
template:
metadata:
labels:
app: redis
spec:
containers:
- name: redis
image: redis:5.0.8
command:
- "sh"
- "-c"
- "redis-server /usr/local/etc/redis/redis.conf"
ports:
- containerPort: 6379
volumeMounts:
- name: config
mountPath: /usr/local/etc/redis/redis.conf
subPath: redis.conf
volumes:
- name: config
configMap:
name: redis-config
---
apiVersion: ray.io/v1alpha1
kind: RayService
metadata:
name: rayservice-sample
annotations:
ray.io/ft-enabled: "true"
kubernetes.io/ingress.class: nginx
spec:
serviceUnhealthySecondThreshold: 300
deploymentUnhealthySecondThreshold: 300
serveConfig:
importPath: "sleepy:app"
runtimeEnv: |
working_dir: "https://github.com/shrekris-anyscale/printing_env/archive/2f2c15c6dd3af8a8d3c3163143049b662bbeb65b.zip"
deployments:
- name: Sleeper
numReplicas: 2
rayActorOptions:
resources: '"{\"attractor\": 1}"'
rayClusterConfig:
rayVersion: '2.0.0'
headGroupSpec:
serviceType: LoadBalancer
enableIngress: true
replicas: 1
rayStartParams:
block: 'true'
num-cpus: '2'
object-store-memory: '100000000'
dashboard-host: '0.0.0.0'
node-ip-address: $MY_POD_IP # Auto-completed as the head pod IP
resources: '"{\"attractor\": 1}"'
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.0.0
imagePullPolicy: Always
env:
- name: SERVE_REQUEST_PROCESSING_TIMEOUT_S
value: "0.751"
- name: SERVE_PROXY_FORWARD_ATTEMPT_TIMEOUT_S
value: "2"
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: RAY_REDIS_ADDRESS
value: redis:6379
- name: SERVE_DEBUG_LOG
value: "1"
resources:
limits:
cpu: 2
memory: 2Gi
requests:
cpu: 2
memory: 2Gi
ports:
- containerPort: 6379
name: redis
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
- containerPort: 8000
name: serve
workerGroupSpecs:
- replicas: 1
minReplicas: 1
maxReplicas: 1
groupName: small-group
rayStartParams:
block: 'true'
node-ip-address: $MY_POD_IP
resources: '"{\"attractor\": 1}"'
template:
spec:
initContainers:
- name: init-myservice
image: busybox:1.28
command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
containers:
- name: machine-learning
image: rayproject/ray:2.0.0
imagePullPolicy: Always
env:
- name: SERVE_REQUEST_PROCESSING_TIMEOUT_S
value: "0.751"
- name: SERVE_PROXY_FORWARD_ATTEMPT_TIMEOUT_S
value: "2"
- name: RAY_DISABLE_DOCKER_CPU_WARNING
value: "1"
- name: TYPE
value: "worker"
- name: CPU_REQUEST
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: requests.cpu
- name: CPU_LIMITS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: limits.cpu
- name: MEMORY_LIMITS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: limits.memory
- name: MEMORY_REQUESTS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: requests.memory
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: SERVE_DEBUG_LOG
value: "1"
ports:
- containerPort: 80
name: client
lifecycle:
preStop:
exec:
command: ["/bin/sh","-c","ray stop"]
resources:
limits:
cpu: "1"
memory: "2Gi"
requests:
cpu: "500m"
memory: "2Gi"
from ray import serve
import logging
logger = logging.getLogger("ray.serve")
@serve.deployment(user_config={"time": 15})
class Sleeper:
def __init__(self):
logger.info("I am initializing!")
def reconfigure(self, config):
import time
time.sleep(config.get("time", 15))
def __call__(self, *args):
logger.info("pre-import print statement!")
import os
logger.info(f"PID {os.getpid()} | I got called with args: {args}")
return os.getpid()
app = Sleeper.bind()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment