Skip to content

Instantly share code, notes, and snippets.

@kwindla
Created April 19, 2024 19:41
Show Gist options
  • Save kwindla/797b9a66dbde115638c406749c49eced to your computer and use it in GitHub Desktop.
Save kwindla/797b9a66dbde115638c406749c49eced to your computer and use it in GitHub Desktop.
Groq Llama-3 Time To First Byte
import os
import json
import time
import statistics
from groq import Groq
# Set the Groq API key and the number of inferences to run
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
NUM_INFERENCES = 10
# Create a Groq client instance
client = Groq(api_key=GROQ_API_KEY)
# Create a list to store the TTFB results
ttfb_results = []
for _ in range(NUM_INFERENCES):
# Set the input prompt (replace with your own input)
input_prompt = "Tell me about Hamlet's state of mind when he said \"to be or not to be?\""
# Measure the time before sending the request
start_time = time.time()
# Send the request to the Groq model
with client.chat.completions.with_streaming_response.create(
messages=[
{
"role": "user",
"content": input_prompt,
}
],
model="llama3-70b-8192",
stream=True,
) as response:
for line in response.iter_lines():
print(line)
break
# Measure the time after receiving the response
end_time = time.time()
# Calculate the TTFB and add it to the results list
ttfb = end_time - start_time
ttfb_results.append(ttfb * 1000) # Convert to milliseconds
# Calculate and display the statistical overview
mean_ttfb = statistics.mean(ttfb_results)
median_ttfb = statistics.median(ttfb_results)
stddev_ttfb = statistics.stdev(ttfb_results)
min_ttfb = min(ttfb_results)
max_ttfb = max(ttfb_results)
print(f"Mean TTFB: {mean_ttfb:.2f} ms")
print(f"Median TTFB: {median_ttfb:.2f} ms")
print(f"Standard Deviation: {stddev_ttfb:.2f} ms")
print(f"Min TTFB: {min_ttfb:.2f} ms")
print(f"Max TTFB: {max_ttfb:.2f} ms")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment