Skip to content

Instantly share code, notes, and snippets.

@gevmin94
Last active October 21, 2025 13:28
Show Gist options
  • Select an option

  • Save gevmin94/58a7c4f5241903b3cc77b427a74e0e7e to your computer and use it in GitHub Desktop.

Select an option

Save gevmin94/58a7c4f5241903b3cc77b427a74e0e7e to your computer and use it in GitHub Desktop.
# 1) Set API keys securely
export ASYNCAI_API_KEY="..."
export CARTESIA_API_KEY="..."
export ELEVEN_API_KEY="..."
# 3) Compare all three providers, 20 runs, 2 warmups
python3 ttfb_bench.py --providers async,cartesia,eleven --n 20 --warmup 3
#!/usr/bin/env python3
import os, time, argparse, statistics
import httpx
# ---------- Defaults (override via CLI) ----------
DEFAULT_TEXT = "Async is designed for low-latency applications, delivering text-to-speech responses in under 200 ms."
DEFAULT_ASYNC_MODEL = "asyncflow_multilingual_v1.0"
DEFAULT_ASYNC_VOICE_ID = "e0f39dc4-f691-4e78-bba5-5c636692cc04"
DEFAULT_CARTESIA_MODEL = "sonic-turbo"
DEFAULT_CARTESIA_VOICE_ID = "694f9389-aac1-45b6-b726-9d9369183238"
DEFAULT_ELEVEN_MODEL = "eleven_flash_v2_5"
DEFAULT_ELEVEN_VOICE_ID = "JBFqnCBsd6RMkjVDRZzb" # replace if you want a different voice
SAMPLE_RATE = 16000
TIMEOUT = httpx.Timeout(connect=5.0, read=60.0, write=30.0, pool=5.0)
LIMITS = httpx.Limits(max_connections=20, max_keepalive_connections=10, keepalive_expiry=30.0)
# ---------- Providers ----------
def make_request_params(provider, args):
"""Return (method, url, headers, json_payload) for the provider."""
text = args.text
if provider == "async":
api_key = os.environ.get("ASYNCAI_API_KEY")
if not api_key:
raise SystemExit("Missing ASYNCAI_API_KEY in environment")
url = "https://api.async.ai/text_to_speech/streaming"
headers = {
"x-api-key": api_key,
"version": "v1",
"Content-Type": "application/json",
"Accept": "audio/*",
"User-Agent": "ttfb-bench/1.0",
}
payload = {
"model_id": args.async_model_id,
"transcript": text,
"voice": {"mode": "id", "id": args.async_voice_id},
"output_format": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": SAMPLE_RATE},
}
return "POST", url, headers, payload
if provider == "cartesia":
api_key = os.environ.get("CARTESIA_API_KEY")
if not api_key:
raise SystemExit("Missing CARTESIA_API_KEY in environment")
url = "https://api.cartesia.ai/tts/bytes"
headers = {
"Authorization": f"Bearer {api_key}",
"Cartesia-Version": "2025-04-16",
"Content-Type": "application/json",
"Accept": "audio/*",
"User-Agent": "ttfb-bench/1.0",
}
payload = {
"model_id": args.cartesia_model_id,
"transcript": text,
"voice": {"mode": "id", "id": args.cartesia_voice_id},
"output_format": {"container": "raw", "encoding": "pcm_s16le", "sample_rate": SAMPLE_RATE},
}
return "POST", url, headers, payload
if provider == "eleven":
api_key = os.environ.get("ELEVEN_API_KEY")
if not api_key:
raise SystemExit("Missing ELEVEN_API_KEY in environment")
# streaming endpoint includes voice in path; set model via JSON
voice_id = args.eleven_voice_id
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream?output_format=pcm_16000"
headers = {
"xi-api-key": api_key,
"Content-Type": "application/json",
"Accept": "audio/*",
"User-Agent": "ttfb-bench/1.0",
}
payload = {
"model_id": args.eleven_model_id, # e.g., eleven_flash_v2_5
"text": text,
}
return "POST", url, headers, payload
raise SystemExit(f"Unknown provider: {provider}")
def one_run(client: httpx.Client, provider: str, args):
method, url, headers, payload = make_request_params(provider, args)
start = time.perf_counter()
with client.stream(method, url, headers=headers, json=payload) as resp:
headers_received = time.perf_counter()
status = resp.status_code
http_ver = resp.http_version
req_id = (
resp.headers.get("x-request-id")
or resp.headers.get("x-amzn-requestid")
or resp.headers.get("x-requestid")
)
# Raise on non-2xx to make failures visible
resp.raise_for_status()
first_chunk_time = None
first_chunk_size = 0
total_bytes = 0
for chunk in resp.iter_bytes():
if not chunk:
continue
total_bytes += len(chunk)
if first_chunk_time is None:
first_chunk_time = time.perf_counter()
first_chunk_size = len(chunk)
end = time.perf_counter()
res = {
"status": status,
"http_version": http_ver,
"request_id": req_id,
"time_to_headers": headers_received - start,
"ttfb": (first_chunk_time - start) if first_chunk_time else None,
"first_chunk_size": first_chunk_size,
"total_bytes": total_bytes,
"total_time": end - start,
"throughput_bps": (total_bytes / (end - start)) if total_bytes and (end - start) > 0 else None,
}
return res
def run_benchmark_for_provider(provider: str, args):
results = []
with httpx.Client(http2=True, timeout=TIMEOUT, follow_redirects=True, limits=LIMITS) as client:
# Warm-ups
for _ in range(args.warmup):
try:
one_run(client, provider, args)
except Exception as e:
print(f"[{provider}][warmup] error: {e}")
# Measured runs
for i in range(args.n):
try:
r = one_run(client, provider, args)
except Exception as e:
print(f"[{provider}][run {i+1}] error: {e}")
continue
print(
f"[{provider}][{i+1:02}] {r['status']} {r['http_version']} "
f"headers={r['time_to_headers']:.3f}s "
f"ttfb={r['ttfb']:.3f}s "
f"first={r['first_chunk_size']}B "
f"total={r['total_bytes']}B "
f"time={r['total_time']:.3f}s "
f"thrpt={(r['throughput_bps']/1024):.1f} KiB/s "
f"reqid={r['request_id'] or '-'}"
)
results.append(r)
return results
def summarize(values):
if not values:
return {"count": 0, "min": None, "median": None, "avg": None, "p95": None, "max": None}
out = {
"count": len(values),
"min": min(values),
"median": statistics.median(values),
"avg": sum(values) / len(values),
"max": max(values),
"p95": None,
}
if len(values) >= 2:
# approximate p95 via quantiles; for small N it's indicative, not exact
try:
out["p95"] = statistics.quantiles(values, n=20)[18] if len(values) >= 20 else sorted(values)[int(0.95*(len(values)-1))]
except Exception:
out["p95"] = None
return out
def print_summary(provider, results):
tth = [r["time_to_headers"] for r in results if r.get("time_to_headers") is not None]
ttfb = [r["ttfb"] for r in results if r.get("ttfb") is not None]
ttot = [r["total_time"] for r in results if r.get("total_time") is not None]
thr = [r["throughput_bps"]/1024 for r in results if r.get("throughput_bps") is not None] # KiB/s
print(f"\n=== {provider.upper()} Summary ===")
def fmt(s):
return {k:(f"{v:.3f}" if isinstance(v,(int,float)) and v is not None else v) for k,v in s.items()}
print("time_to_headers (s):", fmt(summarize(tth)))
print("ttfb (s): ", fmt(summarize(ttfb)))
print("total_time (s): ", fmt(summarize(ttot)))
print("throughput (KiB/s): ", fmt(summarize(thr)))
def parse_args():
p = argparse.ArgumentParser(description="TTFB streaming benchmark for async.ai, Cartesia, and ElevenLabs")
p.add_argument("--providers", type=str, default="async",
help="Comma-separated providers: async,cartesia,eleven (default: async)")
p.add_argument("--n", type=int, default=10, help="Number of measured runs per provider (default: 10)")
p.add_argument("--warmup", type=int, default=1, help="Number of warm-up runs per provider (default: 1)")
p.add_argument("--text", type=str, default=DEFAULT_TEXT, help="Prompt/transcript text")
# Async
p.add_argument("--async-model-id", type=str, default=DEFAULT_ASYNC_MODEL)
p.add_argument("--async-voice-id", type=str, default=DEFAULT_ASYNC_VOICE_ID)
# Cartesia
p.add_argument("--cartesia-model-id", type=str, default=DEFAULT_CARTESIA_MODEL)
p.add_argument("--cartesia-voice-id", type=str, default=DEFAULT_CARTESIA_VOICE_ID)
# ElevenLabs
p.add_argument("--eleven-model-id", type=str, default=DEFAULT_ELEVEN_MODEL) # e.g., eleven_flash_v2_5
p.add_argument("--eleven-voice-id", type=str, default=DEFAULT_ELEVEN_VOICE_ID)
return p.parse_args()
def main():
args = parse_args()
providers = [p.strip().lower() for p in args.providers.split(",") if p.strip()]
print(f"Providers: {providers} | runs={args.n} | warmup={args.warmup}")
for prov in providers:
results = run_benchmark_for_provider(prov, args)
print_summary(prov, results)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment