Skip to content

Instantly share code, notes, and snippets.

diff --git a/python/ray/serve/api.py b/python/ray/serve/api.py
index 2d08bc9db..3d7ef80d5 100644
--- a/python/ray/serve/api.py
+++ b/python/ray/serve/api.py
@@ -3,6 +3,7 @@ import atexit
import collections
import inspect
import os
+import sys
import time
@architkulkarni
architkulkarni / benchmark.py
Created November 23, 2020 23:36
Quick benchmarking for Ray Serve backend
import asyncio
import aiohttp
import time
async def fetch_page(session, url):
async with session.get(url) as response:
assert response.status == 200
return await response.read()
async def main():
@architkulkarni
architkulkarni / benchmark_qps.csv
Last active November 23, 2020 23:40
Throughput benchmark (queries per second)
num_replicas Trial 1 Trial 2 Trial 3
1 0.41 0.45 0.43
10 4.16 4.17 4.09
100 24.22 24.54 24.56
@architkulkarni
architkulkarni / main.py
Created November 23, 2020 23:22
A FastAPI application with a Ray Serve backend.
import ray
from ray import serve
from fastapi import FastAPI
from transformers import pipeline
app = FastAPI()
serve_handle = None
@architkulkarni
architkulkarni / main.py
Created November 23, 2020 23:17
A simple FastAPI application
from fastapi import FastAPI
app = FastAPI()
from transformers import pipeline # A simple API for NLP tasks.
nlp_model = pipeline("text-generation", model="gpt2") # Load the model.
# The function below handles GET requests to the URL `/generate`.
@app.get("/generate")