Skip to content

Instantly share code, notes, and snippets.

@AlexsanderHamir
Created November 8, 2025 18:36
Show Gist options
  • Select an option

  • Save AlexsanderHamir/99d673bf74cdd81fd39f59fa9048f2e8 to your computer and use it in GitHub Desktop.

Select an option

Save AlexsanderHamir/99d673bf74cdd81fd39f59fa9048f2e8 to your computer and use it in GitHub Desktop.
LiteLLM Locust File (Embeddings & Responses)
import os
import uuid
from locust import HttpUser, task, between, events
# Custom metric to track LiteLLM overhead duration
overhead_durations = []
@events.request.add_listener
def on_request(**kwargs):
response = kwargs.get('response')
if response and hasattr(response, 'headers') and response.headers:
overhead_duration = response.headers.get('x-litellm-overhead-duration-ms')
if overhead_duration:
try:
duration_ms = float(overhead_duration)
overhead_durations.append(duration_ms)
# Report as custom metric
events.request.fire(
request_type="Custom",
name="LiteLLM Overhead Duration (ms)",
response_time=duration_ms,
response_length=0,
)
except (ValueError, TypeError):
pass
class MyUser(HttpUser):
wait_time = between(0.5, 1) # Random wait time between requests
def on_start(self):
self.api_key = "sk-1234"
self.client.headers.update({'Authorization': f'Bearer {self.api_key}'})
@task
def litellm_embeddings(self):
# no cache hits with this
payload = {
"model": "text-embedding-3-large",
"input": [{"role": "user", "content": f"{uuid.uuid4()} This is a test there will be no cache hits and we'll fill up the context" * 150}],
"user": "my-new-end-user-1"
}
response = self.client.post("embeddings", json=payload)
if response.status_code != 200:
# log the errors in error.txt
with open("error.txt", "a") as error_log:
error_log.write(response.text + "\n")
@task
def litellm_responses(self):
prompt = (
"System: You are a helpful assistant.\n"
f"User: Ping {uuid.uuid4()} respond with a short acknowledgement."
)
payload = {
"model": "gpt-5-codex",
"input": prompt, # must be a string
"user": "my-new-end-user-1"
}
response = self.client.post("v1/responses", json=payload)
if response.status_code == 200:
data = response.json()
output_text = ""
if "output" in data:
try:
output_text = data["output"][0]["content"][0]["text"]
except (IndexError, KeyError):
output_text = str(data["output"])
print("Response:", output_text)
else:
with open("error.txt", "a") as error_log:
error_log.write(response.text + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment