Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Gist for benchmarking GPT generating performance with same input (43 tokens)
import aiohttp
import json
import asyncio
import datetime
import matplotlib.pyplot as plt
text = """
Planning a project is one thing, but making sure deadlines are met and all tasks are completed is a whole other. Falling behind is easy when an entire sequence of events is dependent on the previous being successfully finished.
"""
sentenses = [s for s in text.split('\n') if s]
async def req(txt, gen_toks):
base = {
"text": txt,
"generate_tokens_limit": gen_toks,
"top_p": 0.7,
"top_k": 0,
"temperature": 1.0
}
async with aiohttp.ClientSession() as session:
async with session.post(
'http://127.0.0.1:8080/generate/',
json=base
) as response:
resp = await response.text()
resp = json.loads(resp)
c = resp['completion']
return c
REPEATS_PER_LENGTH = 3
async def start():
inp = sentenses[0]
res = [
[],
[],
]
for gen_toks in range(100, 1900, 100):
t = datetime.datetime.now()
await asyncio.gather(*[req(inp, gen_toks) for _ in range(REPEATS_PER_LENGTH)])
dur = (datetime.datetime.now() - t).total_seconds()
res[0].append( gen_toks )
res[1].append( dur * 1.0 / REPEATS_PER_LENGTH )
print('RES:\n',
" ".join([str(e) for e in res[0]]),
'\n',
" ".join([str(e) for e in res[1]]),
)
plt.plot(res[0], res[1])
plt.xlabel('input char count')
plt.ylabel('time of 20 tokens gen')
plt.title('Speed of generation from input length')
plt.savefig('res.png', dpi=600)
asyncio.run(start())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment