Created
November 8, 2025 18:36
-
-
Save AlexsanderHamir/99d673bf74cdd81fd39f59fa9048f2e8 to your computer and use it in GitHub Desktop.
LiteLLM Locust File (Embeddings & Responses)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import uuid | |
| from locust import HttpUser, task, between, events | |
| # Custom metric to track LiteLLM overhead duration | |
| overhead_durations = [] | |
| @events.request.add_listener | |
| def on_request(**kwargs): | |
| response = kwargs.get('response') | |
| if response and hasattr(response, 'headers') and response.headers: | |
| overhead_duration = response.headers.get('x-litellm-overhead-duration-ms') | |
| if overhead_duration: | |
| try: | |
| duration_ms = float(overhead_duration) | |
| overhead_durations.append(duration_ms) | |
| # Report as custom metric | |
| events.request.fire( | |
| request_type="Custom", | |
| name="LiteLLM Overhead Duration (ms)", | |
| response_time=duration_ms, | |
| response_length=0, | |
| ) | |
| except (ValueError, TypeError): | |
| pass | |
| class MyUser(HttpUser): | |
| wait_time = between(0.5, 1) # Random wait time between requests | |
| def on_start(self): | |
| self.api_key = "sk-1234" | |
| self.client.headers.update({'Authorization': f'Bearer {self.api_key}'}) | |
| @task | |
| def litellm_embeddings(self): | |
| # no cache hits with this | |
| payload = { | |
| "model": "text-embedding-3-large", | |
| "input": [{"role": "user", "content": f"{uuid.uuid4()} This is a test there will be no cache hits and we'll fill up the context" * 150}], | |
| "user": "my-new-end-user-1" | |
| } | |
| response = self.client.post("embeddings", json=payload) | |
| if response.status_code != 200: | |
| # log the errors in error.txt | |
| with open("error.txt", "a") as error_log: | |
| error_log.write(response.text + "\n") | |
| @task | |
| def litellm_responses(self): | |
| prompt = ( | |
| "System: You are a helpful assistant.\n" | |
| f"User: Ping {uuid.uuid4()} respond with a short acknowledgement." | |
| ) | |
| payload = { | |
| "model": "gpt-5-codex", | |
| "input": prompt, # must be a string | |
| "user": "my-new-end-user-1" | |
| } | |
| response = self.client.post("v1/responses", json=payload) | |
| if response.status_code == 200: | |
| data = response.json() | |
| output_text = "" | |
| if "output" in data: | |
| try: | |
| output_text = data["output"][0]["content"][0]["text"] | |
| except (IndexError, KeyError): | |
| output_text = str(data["output"]) | |
| print("Response:", output_text) | |
| else: | |
| with open("error.txt", "a") as error_log: | |
| error_log.write(response.text + "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment