morganmcg1/moe_non_determinism.py

## moe_non_determinism.py
import os
import json
import tqdm

import wandb
from openai import OpenAI

from time import sleep
from pathlib import Path

from dotenv import load_dotenv
load_dotenv()

chat_models = ["gpt-4-0613", "gpt-4-1106-preview", "gpt-3.5-turbo", "gpt-4-1106-preview"]

together_models = ["mistralai/Mixtral-8x7B-Instruct-v0.1"]
# together_models = []
chat_models.extend(together_models)

message_history = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Write a unique, surprising, extremely randomized story with highly unpredictable changes of events."}
]

completion_models = ["text-davinci-003", "text-davinci-001", "davinci-instruct-beta", "davinci"]
prompt = "[System: You are a helpful assistant]\n\nUser: Write a unique, surprising, extremely randomized story with highly unpredictable changes of events.\n\nAI:"

results = []

import time
class TimeIt:
    def __init__(self, name): self.name = name
    def __enter__(self): self.start = time.time()
    def __exit__(self, *args): print(f"{self.name} took {time.time() - self.start} seconds")


C = 30  # number of completions to make per model
N = 128 # max_tokens

wandb.init(project="moe-non-determinism", entity="morgan", config={"n_completions": C, "max_tokens": N})
tbl = wandb.Table(columns=["model", "sequence"])

# Testing chat models
for model in chat_models:
    if model in together_models:
        api_key = os.environ.get("TOGETHER_API_KEY")
        base_url="https://api.together.xyz/"
        logit_bias = None
    else:
        api_key = os.environ.get("OPENAI_API_KEY")
        base_url = None
        logit_bias = {"100257": -100.0}
    client = OpenAI(api_key=api_key, base_url=base_url)
    sequences = set()
    errors = 0 # although I track errors, at no point were any errors ever emitted
    with TimeIt(model):
        for _ in range(C):
            try:
                completion = client.chat.completions.create(
                    model=model,
                    messages=message_history,
                    max_tokens=N,
                    temperature=0,
                    logit_bias=logit_bias, # this doesn't really do anything, because chat models don't do <|endoftext|> much
                )
                # print(completion)
                sequences.add(completion.choices[0].message.content)
                tbl.add_data(model, completion.choices[0].message.content)
                sleep(5) # cheaply avoid rate limiting
            except Exception as e:
                print('something went wrong for', model, e)
                errors += 1
    print(f"\nModel {model} created {len(sequences)} ({errors=}) unique sequences:")
    wandb.summary[f"{model}_unique_sequences"] = len(sequences)
    wandb.summary[f"{model}_errors"] = errors
    print(json.dumps(list(sequences)))
    results.append((len(sequences), model))

# Testing completion models
# for model in completion_models:
#     sequences = set()
#     errors = 0
#     with TimeIt(model):
#         for _ in range(C):
#             try:
#                 completion = openai.Completion.create(
#                     model=model,
#                     prompt=prompt,
#                     max_tokens=N,
#                     temperature=0,
#                     logit_bias = {"50256": -100.0}, # prevent EOS
#                 )
#                 sequences.add(completion.choices[0].text)
#                 sleep(1)
#             except Exception as e:
#                 print('something went wrong for', model, e)
#                 errors += 1
#     print(f"\nModel {model} created {len(sequences)} ({errors=}) unique sequences:")
#     print(json.dumps(list(sequences)))
#     results.append((len(sequences), model))

# Printing table of results
wandb.log({"results": tbl})
print("\nTable of Results:")
print("Num_Sequences\tModel_Name")
for num_sequences, model_name in results:
    print(f"{num_sequences}\t{model_name}")
	import os
	import json
	import tqdm

	import wandb
	from openai import OpenAI

	from time import sleep
	from pathlib import Path

	from dotenv import load_dotenv
	load_dotenv()

	chat_models = ["gpt-4-0613", "gpt-4-1106-preview", "gpt-3.5-turbo", "gpt-4-1106-preview"]

	together_models = ["mistralai/Mixtral-8x7B-Instruct-v0.1"]
	# together_models = []
	chat_models.extend(together_models)

	message_history = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": "Write a unique, surprising, extremely randomized story with highly unpredictable changes of events."}
	]

	completion_models = ["text-davinci-003", "text-davinci-001", "davinci-instruct-beta", "davinci"]
	prompt = "[System: You are a helpful assistant]\n\nUser: Write a unique, surprising, extremely randomized story with highly unpredictable changes of events.\n\nAI:"

	results = []

	import time
	class TimeIt:
	def __init__(self, name): self.name = name
	def __enter__(self): self.start = time.time()
	def __exit__(self, *args): print(f"{self.name} took {time.time() - self.start} seconds")


	C = 30 # number of completions to make per model
	N = 128 # max_tokens

	wandb.init(project="moe-non-determinism", entity="morgan", config={"n_completions": C, "max_tokens": N})
	tbl = wandb.Table(columns=["model", "sequence"])

	# Testing chat models
	for model in chat_models:
	if model in together_models:
	api_key = os.environ.get("TOGETHER_API_KEY")
	base_url="https://api.together.xyz/"
	logit_bias = None
	else:
	api_key = os.environ.get("OPENAI_API_KEY")
	base_url = None
	logit_bias = {"100257": -100.0}
	client = OpenAI(api_key=api_key, base_url=base_url)
	sequences = set()
	errors = 0 # although I track errors, at no point were any errors ever emitted
	with TimeIt(model):
	for _ in range(C):
	try:
	completion = client.chat.completions.create(
	model=model,
	messages=message_history,
	max_tokens=N,
	temperature=0,
	logit_bias=logit_bias, # this doesn't really do anything, because chat models don't do <\|endoftext\|> much
	)
	# print(completion)
	sequences.add(completion.choices[0].message.content)
	tbl.add_data(model, completion.choices[0].message.content)
	sleep(5) # cheaply avoid rate limiting
	except Exception as e:
	print('something went wrong for', model, e)
	errors += 1
	print(f"\nModel {model} created {len(sequences)} ({errors=}) unique sequences:")
	wandb.summary[f"{model}_unique_sequences"] = len(sequences)
	wandb.summary[f"{model}_errors"] = errors
	print(json.dumps(list(sequences)))
	results.append((len(sequences), model))

	# Testing completion models
	# for model in completion_models:
	# sequences = set()
	# errors = 0
	# with TimeIt(model):
	# for _ in range(C):
	# try:
	# completion = openai.Completion.create(
	# model=model,
	# prompt=prompt,
	# max_tokens=N,
	# temperature=0,
	# logit_bias = {"50256": -100.0}, # prevent EOS
	# )
	# sequences.add(completion.choices[0].text)
	# sleep(1)
	# except Exception as e:
	# print('something went wrong for', model, e)
	# errors += 1
	# print(f"\nModel {model} created {len(sequences)} ({errors=}) unique sequences:")
	# print(json.dumps(list(sequences)))
	# results.append((len(sequences), model))

	# Printing table of results
	wandb.log({"results": tbl})
	print("\nTable of Results:")
	print("Num_Sequences\tModel_Name")
	for num_sequences, model_name in results:
	print(f"{num_sequences}\t{model_name}")