Gist for benchmarking GPT generating performance with same input (43 tokens)
import aiohttp
import json
import asyncio
import datetime
import matplotlib.pyplot as plt
text = """
Planning a project is one thing, but making sure deadlines are met and all tasks are completed is a whole other. Falling behind is easy when an entire sequence of events is dependent on the previous being successfully finished.
sentenses = [s for s in text.split('\n') if s]
async def req(txt, gen_toks):
base = {
"text": txt,
"generate_tokens_limit": gen_toks,
"top_p": 0.7,
"top_k": 0,
"temperature": 1.0
async with aiohttp.ClientSession() as session:
async with
) as response:
resp = await response.text()
resp = json.loads(resp)
c = resp['completion']
return c
async def start():
inp = sentenses[0]
res = [
for gen_toks in range(100, 1900, 100):
t =
await asyncio.gather(*[req(inp, gen_toks) for _ in range(REPEATS_PER_LENGTH)])
dur = ( - t).total_seconds()
res[0].append( gen_toks )
res[1].append( dur * 1.0 / REPEATS_PER_LENGTH )
" ".join([str(e) for e in res[0]]),
" ".join([str(e) for e in res[1]]),
plt.plot(res[0], res[1])
plt.xlabel('input char count')
plt.ylabel('time of 20 tokens gen')
plt.title('Speed of generation from input length')
plt.savefig('res.png', dpi=600)
