Skip to content

Instantly share code, notes, and snippets.

@ivictbor
Created February 16, 2022 13:59
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save ivictbor/5ee3419eef68bce14b1cb090e9d94787 to your computer and use it in GitHub Desktop.
Gist for benchmarking GPT generating performance with same input (43 tokens)
import aiohttp
import json
import asyncio
import datetime
import matplotlib.pyplot as plt
text = """
Planning a project is one thing, but making sure deadlines are met and all tasks are completed is a whole other. Falling behind is easy when an entire sequence of events is dependent on the previous being successfully finished.
"""
sentenses = [s for s in text.split('\n') if s]
async def req(txt, gen_toks):
base = {
"text": txt,
"generate_tokens_limit": gen_toks,
"top_p": 0.7,
"top_k": 0,
"temperature": 1.0
}
async with aiohttp.ClientSession() as session:
async with session.post(
'http://127.0.0.1:8080/generate/',
json=base
) as response:
resp = await response.text()
resp = json.loads(resp)
c = resp['completion']
return c
REPEATS_PER_LENGTH = 3
async def start():
inp = sentenses[0]
res = [
[],
[],
]
for gen_toks in range(100, 1900, 100):
t = datetime.datetime.now()
await asyncio.gather(*[req(inp, gen_toks) for _ in range(REPEATS_PER_LENGTH)])
dur = (datetime.datetime.now() - t).total_seconds()
res[0].append( gen_toks )
res[1].append( dur * 1.0 / REPEATS_PER_LENGTH )
print('RES:\n',
" ".join([str(e) for e in res[0]]),
'\n',
" ".join([str(e) for e in res[1]]),
)
plt.plot(res[0], res[1])
plt.xlabel('input char count')
plt.ylabel('time of 20 tokens gen')
plt.title('Speed of generation from input length')
plt.savefig('res.png', dpi=600)
asyncio.run(start())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment