Skip to content

Instantly share code, notes, and snippets.

@ftnext
Created June 16, 2024 16:12
Show Gist options
  • Save ftnext/ac42ed33019cf4856a7a3437f91a2dfc to your computer and use it in GitHub Desktop.
Save ftnext/ac42ed33019cf4856a7a3437f91a2dfc to your computer and use it in GitHub Desktop.
# /// script
# dependencies = ["openai", "tqdm"]
# ///
# based on https://github.com/rasbt/LLMs-from-scratch/blob/aba7ed2eb1fce4ebbca28eeed11ab19687cb1764/ch07/03_model-evaluation/llm-instruction-eval-ollama.ipynb
from openai import OpenAI
from tqdm import tqdm
json_data = [
{
"instruction": "Name 3 different animals that are active during the day.",
"input": "",
"output": "1. Squirrel\n2. Eagle\n3. Tiger",
"model 1 response": "\n1. Squirrel\n2. Tiger\n3. Eagle\n4. Cobra\n5. Tiger\n6. Cobra",
"model 2 response": "\n1. Squirrel\n2. Eagle\n3. Tiger",
},
{
"instruction": "Take the following noun and turn it into a verb.",
"input": "Light",
"output": "To light.",
"model 1 response": "\nTo light.",
"model 2 response": "\nLight is the most abundant element in the periodic table.",
},
]
def format_input(entry):
instruction_text = f"""\
Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{entry["instruction"]}\
"""
input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""
return instruction_text + input_text
client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
for entry in tqdm(json_data, desc="Scoring entries"):
input_string = format_input(entry)
print(input_string)
print()
print(f"correct output: {entry['output']}")
print()
for json_key in ("model 1 response", "model 2 response"):
prompt = f"Given the input `{input_string}`, and correct output `{entry['output']}`, score the model response `{entry[json_key]}` on a scale from 0 to 100, where 100 is the best score. Respond with the integer number only."
response = client.chat.completions.create(
model="llama3",
seed=123,
temperature=0,
max_tokens=5,
messages=[
{"role": "user", "content": prompt},
],
)
try:
score = int(response.choices[0].message.content)
except ValueError:
continue
print(f"{json_key}: {entry[json_key]}")
print(f"score {score}/100")
print()
print("-" * 40)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment