Last active
October 21, 2025 13:28
-
-
Save gevmin94/58a7c4f5241903b3cc77b427a74e0e7e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 1) Set API keys securely | |
| export ASYNCAI_API_KEY="..." | |
| export CARTESIA_API_KEY="..." | |
| export ELEVEN_API_KEY="..." | |
| # 3) Compare all three providers, 20 runs, 2 warmups | |
| python3 ttfb_bench.py --providers async,cartesia,eleven --n 20 --warmup 3 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import os, time, argparse, statistics | |
| import httpx | |
| # ---------- Defaults (override via CLI) ---------- | |
| DEFAULT_TEXT = "Async is designed for low-latency applications, delivering text-to-speech responses in under 200 ms." | |
| DEFAULT_ASYNC_MODEL = "asyncflow_multilingual_v1.0" | |
| DEFAULT_ASYNC_VOICE_ID = "e0f39dc4-f691-4e78-bba5-5c636692cc04" | |
| DEFAULT_CARTESIA_MODEL = "sonic-turbo" | |
| DEFAULT_CARTESIA_VOICE_ID = "694f9389-aac1-45b6-b726-9d9369183238" | |
| DEFAULT_ELEVEN_MODEL = "eleven_flash_v2_5" | |
| DEFAULT_ELEVEN_VOICE_ID = "JBFqnCBsd6RMkjVDRZzb" # replace if you want a different voice | |
| SAMPLE_RATE = 16000 | |
| TIMEOUT = httpx.Timeout(connect=5.0, read=60.0, write=30.0, pool=5.0) | |
| LIMITS = httpx.Limits(max_connections=20, max_keepalive_connections=10, keepalive_expiry=30.0) | |
| # ---------- Providers ---------- | |
| def make_request_params(provider, args): | |
| """Return (method, url, headers, json_payload) for the provider.""" | |
| text = args.text | |
| if provider == "async": | |
| api_key = os.environ.get("ASYNCAI_API_KEY") | |
| if not api_key: | |
| raise SystemExit("Missing ASYNCAI_API_KEY in environment") | |
| url = "https://api.async.ai/text_to_speech/streaming" | |
| headers = { | |
| "x-api-key": api_key, | |
| "version": "v1", | |
| "Content-Type": "application/json", | |
| "Accept": "audio/*", | |
| "User-Agent": "ttfb-bench/1.0", | |
| } | |
| payload = { | |
| "model_id": args.async_model_id, | |
| "transcript": text, | |
| "voice": {"mode": "id", "id": args.async_voice_id}, | |
| "output_format": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": SAMPLE_RATE}, | |
| } | |
| return "POST", url, headers, payload | |
| if provider == "cartesia": | |
| api_key = os.environ.get("CARTESIA_API_KEY") | |
| if not api_key: | |
| raise SystemExit("Missing CARTESIA_API_KEY in environment") | |
| url = "https://api.cartesia.ai/tts/bytes" | |
| headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Cartesia-Version": "2025-04-16", | |
| "Content-Type": "application/json", | |
| "Accept": "audio/*", | |
| "User-Agent": "ttfb-bench/1.0", | |
| } | |
| payload = { | |
| "model_id": args.cartesia_model_id, | |
| "transcript": text, | |
| "voice": {"mode": "id", "id": args.cartesia_voice_id}, | |
| "output_format": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": SAMPLE_RATE}, | |
| } | |
| return "POST", url, headers, payload | |
| if provider == "eleven": | |
| api_key = os.environ.get("ELEVEN_API_KEY") | |
| if not api_key: | |
| raise SystemExit("Missing ELEVEN_API_KEY in environment") | |
| # streaming endpoint includes voice in path; set model via JSON | |
| voice_id = args.eleven_voice_id | |
| url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream?output_format=pcm_16000" | |
| headers = { | |
| "xi-api-key": api_key, | |
| "Content-Type": "application/json", | |
| "Accept": "audio/*", | |
| "User-Agent": "ttfb-bench/1.0", | |
| } | |
| payload = { | |
| "model_id": args.eleven_model_id, # e.g., eleven_flash_v2_5 | |
| "text": text, | |
| } | |
| return "POST", url, headers, payload | |
| raise SystemExit(f"Unknown provider: {provider}") | |
| def one_run(client: httpx.Client, provider: str, args): | |
| method, url, headers, payload = make_request_params(provider, args) | |
| start = time.perf_counter() | |
| with client.stream(method, url, headers=headers, json=payload) as resp: | |
| headers_received = time.perf_counter() | |
| status = resp.status_code | |
| http_ver = resp.http_version | |
| req_id = ( | |
| resp.headers.get("x-request-id") | |
| or resp.headers.get("x-amzn-requestid") | |
| or resp.headers.get("x-requestid") | |
| ) | |
| # Raise on non-2xx to make failures visible | |
| resp.raise_for_status() | |
| first_chunk_time = None | |
| first_chunk_size = 0 | |
| total_bytes = 0 | |
| for chunk in resp.iter_bytes(): | |
| if not chunk: | |
| continue | |
| total_bytes += len(chunk) | |
| if first_chunk_time is None: | |
| first_chunk_time = time.perf_counter() | |
| first_chunk_size = len(chunk) | |
| end = time.perf_counter() | |
| res = { | |
| "status": status, | |
| "http_version": http_ver, | |
| "request_id": req_id, | |
| "time_to_headers": headers_received - start, | |
| "ttfb": (first_chunk_time - start) if first_chunk_time else None, | |
| "first_chunk_size": first_chunk_size, | |
| "total_bytes": total_bytes, | |
| "total_time": end - start, | |
| "throughput_bps": (total_bytes / (end - start)) if total_bytes and (end - start) > 0 else None, | |
| } | |
| return res | |
| def run_benchmark_for_provider(provider: str, args): | |
| results = [] | |
| with httpx.Client(http2=True, timeout=TIMEOUT, follow_redirects=True, limits=LIMITS) as client: | |
| # Warm-ups | |
| for _ in range(args.warmup): | |
| try: | |
| one_run(client, provider, args) | |
| except Exception as e: | |
| print(f"[{provider}][warmup] error: {e}") | |
| # Measured runs | |
| for i in range(args.n): | |
| try: | |
| r = one_run(client, provider, args) | |
| except Exception as e: | |
| print(f"[{provider}][run {i+1}] error: {e}") | |
| continue | |
| print( | |
| f"[{provider}][{i+1:02}] {r['status']} {r['http_version']} " | |
| f"headers={r['time_to_headers']:.3f}s " | |
| f"ttfb={r['ttfb']:.3f}s " | |
| f"first={r['first_chunk_size']}B " | |
| f"total={r['total_bytes']}B " | |
| f"time={r['total_time']:.3f}s " | |
| f"thrpt={(r['throughput_bps']/1024):.1f} KiB/s " | |
| f"reqid={r['request_id'] or '-'}" | |
| ) | |
| results.append(r) | |
| return results | |
| def summarize(values): | |
| if not values: | |
| return {"count": 0, "min": None, "median": None, "avg": None, "p95": None, "max": None} | |
| out = { | |
| "count": len(values), | |
| "min": min(values), | |
| "median": statistics.median(values), | |
| "avg": sum(values) / len(values), | |
| "max": max(values), | |
| "p95": None, | |
| } | |
| if len(values) >= 2: | |
| # approximate p95 via quantiles; for small N it's indicative, not exact | |
| try: | |
| out["p95"] = statistics.quantiles(values, n=20)[18] if len(values) >= 20 else sorted(values)[int(0.95*(len(values)-1))] | |
| except Exception: | |
| out["p95"] = None | |
| return out | |
| def print_summary(provider, results): | |
| tth = [r["time_to_headers"] for r in results if r.get("time_to_headers") is not None] | |
| ttfb = [r["ttfb"] for r in results if r.get("ttfb") is not None] | |
| ttot = [r["total_time"] for r in results if r.get("total_time") is not None] | |
| thr = [r["throughput_bps"]/1024 for r in results if r.get("throughput_bps") is not None] # KiB/s | |
| print(f"\n=== {provider.upper()} Summary ===") | |
| def fmt(s): | |
| return {k:(f"{v:.3f}" if isinstance(v,(int,float)) and v is not None else v) for k,v in s.items()} | |
| print("time_to_headers (s):", fmt(summarize(tth))) | |
| print("ttfb (s): ", fmt(summarize(ttfb))) | |
| print("total_time (s): ", fmt(summarize(ttot))) | |
| print("throughput (KiB/s): ", fmt(summarize(thr))) | |
| def parse_args(): | |
| p = argparse.ArgumentParser(description="TTFB streaming benchmark for async.ai, Cartesia, and ElevenLabs") | |
| p.add_argument("--providers", type=str, default="async", | |
| help="Comma-separated providers: async,cartesia,eleven (default: async)") | |
| p.add_argument("--n", type=int, default=10, help="Number of measured runs per provider (default: 10)") | |
| p.add_argument("--warmup", type=int, default=1, help="Number of warm-up runs per provider (default: 1)") | |
| p.add_argument("--text", type=str, default=DEFAULT_TEXT, help="Prompt/transcript text") | |
| # Async | |
| p.add_argument("--async-model-id", type=str, default=DEFAULT_ASYNC_MODEL) | |
| p.add_argument("--async-voice-id", type=str, default=DEFAULT_ASYNC_VOICE_ID) | |
| # Cartesia | |
| p.add_argument("--cartesia-model-id", type=str, default=DEFAULT_CARTESIA_MODEL) | |
| p.add_argument("--cartesia-voice-id", type=str, default=DEFAULT_CARTESIA_VOICE_ID) | |
| # ElevenLabs | |
| p.add_argument("--eleven-model-id", type=str, default=DEFAULT_ELEVEN_MODEL) # e.g., eleven_flash_v2_5 | |
| p.add_argument("--eleven-voice-id", type=str, default=DEFAULT_ELEVEN_VOICE_ID) | |
| return p.parse_args() | |
| def main(): | |
| args = parse_args() | |
| providers = [p.strip().lower() for p in args.providers.split(",") if p.strip()] | |
| print(f"Providers: {providers} | runs={args.n} | warmup={args.warmup}") | |
| for prov in providers: | |
| results = run_benchmark_for_provider(prov, args) | |
| print_summary(prov, results) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment