Skip to content

Instantly share code, notes, and snippets.

@danpaldev
Last active June 8, 2023 21:16
Show Gist options
  • Save danpaldev/47eb6c51e08a79abd8c43560417313f3 to your computer and use it in GitHub Desktop.
Save danpaldev/47eb6c51e08a79abd8c43560417313f3 to your computer and use it in GitHub Desktop.
openplayground-metrics-cron
import json
import sseclient
import requests
from models_schema import models
from pprint import pformat
import time
import redis
import logging
logging.basicConfig(filename='metrics_cron.log', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
cookies = {
"_client_uat": "",
"__session": ""
}
prompt = "Generate a Django application with Authentication, JWT, Tests, DB support. Show docker-compose for python and postgres. Show the complete code for every file!"
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
try:
with requests.post(
f"https://dev.nat.dev/api/inference/text",
headers={
"Content-Type": "application/json",
},
cookies=cookies,
data=json.dumps(
{"prompt": prompt, "models": models, "stream": True}),
stream=True
) as response:
if response.status_code != 200:
raise Exception(
f"Request failed: {response.status_code} {response.reason}")
MODEL_TRACKER = {}
start_time = time.time()
for packet in sseclient.SSEClient(response).events():
event_object = {'type': packet.event, 'data': packet.data}
data = json.loads(event_object['data'])
current_model = data['modelName']
model_provider = data['modelProvider']
if event_object['type'] == "completion":
try:
if current_model in MODEL_TRACKER:
MODEL_TRACKER[current_model]['answer_tokens'].append(
data['token'])
else:
MODEL_TRACKER[current_model] = {
'first_byte_timestamp': time.time(), 'answer_tokens': [data['token']], 'provider': model_provider}
except json.decoder.JSONDecodeError:
logging.exception("JSONDecodeError:")
logging.info(pformat(event_object))
pass
elif event_object['type'] == "status":
try:
if data['token'] == '[COMPLETED]':
MODEL_TRACKER[current_model]['answer_completed_timestamp'] = time.time(
)
if "[ERROR]" in data['token']:
MODEL_TRACKER[current_model] = {
"error": data['token'], 'provider': model_provider, "answer_completed_timestamp": 0}
except Exception:
logging.exception("Unknown Exception:")
logging.info(pformat(event_object))
pass
for model_key, model_content in MODEL_TRACKER.items():
logging.info(f"CALCULATIONS STEP: {model_key} {model_content}")
# If our model was flagged with error on above's step, we fill it with empty data
if "error" in MODEL_TRACKER[model_key]:
MODEL_TRACKER[model_key]['start_time'] = start_time
MODEL_TRACKER[model_key]['model'] = model_key
MODEL_TRACKER[model_key]['completion_response_time'] = 0
MODEL_TRACKER[model_key]['average_token_second'] = 0
else:
try:
if "answer_tokens" in MODEL_TRACKER[model_key]:
# There were cases when completions didn't fail, but were empty anyways! We handle that here
if len(MODEL_TRACKER[model_key]['answer_tokens']) == 0:
MODEL_TRACKER[model_key]['error'] = 'Timeout or Empty Token Response'
MODEL_TRACKER[model_key]['start_time'] = start_time
MODEL_TRACKER[model_key]['model'] = model_key
MODEL_TRACKER[model_key]['completion_response_time'] = 0
MODEL_TRACKER[model_key]['average_token_second'] = 0
# After discarding all "false positives", we finally pass to the calculations
else:
elapsed_time = model_content['answer_completed_timestamp'] - start_time
logging.info(
f"Model {model_key} completed in {elapsed_time} seconds")
MODEL_TRACKER[model_key]['start_time'] = start_time
MODEL_TRACKER[model_key]['model'] = model_key
MODEL_TRACKER[model_key]['completion_response_time'] = elapsed_time
MODEL_TRACKER[model_key]['average_token_second'] = len(
model_content['answer_tokens']) / elapsed_time
MODEL_TRACKER[model_key]['first_byte_elapsed_time'] = model_content['first_byte_timestamp'] - start_time
# We are going to directly use the MODEL_TRACKER dictionary for writing into Redis, so we clean it
del MODEL_TRACKER[model_key]['answer_tokens']
del MODEL_TRACKER[model_key]['answer_completed_timestamp']
del MODEL_TRACKER[model_key]['first_byte_timestamp']
except KeyError as e:
if MODEL_TRACKER[model_key]:
logging.error(
f"KeyError: {e} in MODEL_TRACKER[{model_key}]")
else:
logging.error(f"KeyError: {e} in MODEL_TRACKER")
continue
for model_key, model_content in MODEL_TRACKER.items():
try:
r.zadd(model_key, {str(model_content): int(start_time)})
logging.info(
f"REDIS SUCCESFUL WRITE {model_key} {str(model_content)} {int(start_time)}")
except Exception as e:
logging.error(
f"REDIS WRITING STEP ERROR: {e} {model_key} {model_content}")
continue
except Exception as e:
logging.exception("An exception occurred")
models = [
{
"name": "openai:text-ada-001",
"tag": "openai:text-ada-001",
"capabilities": [
"logprobs",
"completion"
],
"provider":"openai",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "openai:text-babbage-001",
"tag": "openai:text-babbage-001",
"capabilities": [
"logprobs",
"completion"
],
"provider":"openai",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":True
},
{
"name": "openai:text-curie-001",
"tag": "openai:text-curie-001",
"capabilities": [
"logprobs",
"completion"
],
"provider":"openai",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "openai:text-davinci-003",
"tag": "openai:text-davinci-003",
"capabilities": [
"logprobs",
"completion"
],
"provider":"openai",
"parameters":{
"temperature": 0,
"contextLength": 3840,
"maximumLength": 525,
"topP": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "openai:gpt-3.5-turbo",
"tag": "openai:gpt-3.5-turbo",
"capabilities": [
"chat"
],
"provider":"openai",
"parameters":{
"temperature": 0,
"contextLength": 3840,
"maximumLength": 525,
"topP": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "openai:gpt-4",
"tag": "openai:gpt-4",
"capabilities": [
"chat"
],
"provider":"openai",
"parameters":{
"temperature": 0,
"contextLength": 7935,
"maximumLength": 525,
"topP": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "anthropic:claude-instant-v1",
"tag": "anthropic:claude-instant-v1",
"capabilities": [
"logprobs",
"chat",
"completion"
],
"provider":"anthropic",
"parameters":{
"temperature": 0,
"contextLength": 8960,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"presencePenalty": 1,
"frequencyPenalty": 1,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "anthropic:claude-v1",
"tag": "anthropic:claude-v1",
"capabilities": [
"logprobs",
"chat",
"completion"
],
"provider":"anthropic",
"parameters":{
"temperature": 0,
"contextLength": 8960,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"presencePenalty": 1,
"frequencyPenalty": 1,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "anthropic:claude-v1-100k",
"tag": "anthropic:claude-v1-100k",
"capabilities": [
"logprobs",
"chat",
"completion"
],
"provider":"anthropic",
"parameters":{
"temperature": 0,
"contextLength": 8960,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"presencePenalty": 1,
"frequencyPenalty": 1,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "anthropic:claude-instant-v1-100k",
"tag": "anthropic:claude-instant-v1-100k",
"capabilities": [
"logprobs",
"chat",
"completion"
],
"provider":"anthropic",
"parameters":{
"temperature": 0,
"contextLength": 8960,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"presencePenalty": 1,
"frequencyPenalty": 1,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "aleph-alpha:luminous-supreme-control",
"tag": "aleph-alpha:luminous-supreme-control",
"capabilities": [
"logprobs",
"completion"
],
"provider":"aleph-alpha",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"repetitionPenalty": 1,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "aleph-alpha:luminous-base",
"tag": "aleph-alpha:luminous-base",
"capabilities": [
"logprobs",
"completion"
],
"provider":"aleph-alpha",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"repetitionPenalty": 1,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "aleph-alpha:luminous-supreme",
"tag": "aleph-alpha:luminous-supreme",
"capabilities": [
"logprobs",
"completion"
],
"provider":"aleph-alpha",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"repetitionPenalty": 1,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "aleph-alpha:luminous-extended",
"tag": "aleph-alpha:luminous-extended",
"capabilities": [
"logprobs",
"completion"
],
"provider":"aleph-alpha",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"repetitionPenalty": 1,
"stopSequences": [
],
"numberOfSamples":1
},
"enabled":True,
"selected":False
},
{
"name": "forefront:gpt-j-6b-vanilla",
"tag": "forefront:gpt-j-6b-vanilla",
"capabilities": [
"logprobs",
"completion"
],
"provider":"forefront",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"repetitionPenalty": 1,
"stopSequences": [
]
},
"enabled":True,
"selected":False
},
{
"name": "forefront:pythia-6.9b",
"tag": "forefront:pythia-6.9b",
"capabilities": [
"logprobs",
"completion"
],
"provider":"forefront",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"repetitionPenalty": 1,
"stopSequences": [
]
},
"enabled":True,
"selected":False
},
{
"name": "forefront:pythia-12b",
"tag": "forefront:pythia-12b",
"capabilities": [
"logprobs",
"completion"
],
"provider":"forefront",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"repetitionPenalty": 1,
"stopSequences": [
]
},
"enabled":True,
"selected":False
},
{
"name": "forefront:gpt-neox-20b-vanilla",
"tag": "forefront:gpt-neox-20b-vanilla",
"capabilities": [
"logprobs",
"completion"
],
"provider":"forefront",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"repetitionPenalty": 1,
"stopSequences": [
]
},
"enabled":True,
"selected":False
},
{
"name": "forefront:pythia-20b",
"tag": "forefront:pythia-20b",
"capabilities": [
"logprobs",
"completion"
],
"provider":"forefront",
"parameters":{
"temperature": 0,
"contextLength": 1792,
"maximumLength": 525,
"topP": 1,
"topK": 1,
"presencePenalty": 0,
"frequencyPenalty": 0,
"repetitionPenalty": 1,
"stopSequences": [
]
},
"enabled":True,
"selected":False
}
]
hiredis==2.2.3
redis==4.5.5
sseclient-py==1.7.2
requests==2.31.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment