ftnext/rasbt_llm-instruction-eval-ollama.py

## rasbt_llm-instruction-eval-ollama.py
# /// script
# dependencies = ["openai", "tqdm"]
# ///

# based on https://github.com/rasbt/LLMs-from-scratch/blob/aba7ed2eb1fce4ebbca28eeed11ab19687cb1764/ch07/03_model-evaluation/llm-instruction-eval-ollama.ipynb

from openai import OpenAI
from tqdm import tqdm

json_data = [
    {
        "instruction": "Name 3 different animals that are active during the day.",
        "input": "",
        "output": "1. Squirrel\n2. Eagle\n3. Tiger",
        "model 1 response": "\n1. Squirrel\n2. Tiger\n3. Eagle\n4. Cobra\n5. Tiger\n6. Cobra",
        "model 2 response": "\n1. Squirrel\n2. Eagle\n3. Tiger",
    },
    {
        "instruction": "Take the following noun and turn it into a verb.",
        "input": "Light",
        "output": "To light.",
        "model 1 response": "\nTo light.",
        "model 2 response": "\nLight is the most abundant element in the periodic table.",
    },
]


def format_input(entry):
    instruction_text = f"""\
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{entry["instruction"]}\
"""
    input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""
    return instruction_text + input_text


client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")

for entry in tqdm(json_data, desc="Scoring entries"):
    input_string = format_input(entry)
    print(input_string)
    print()
    print(f"correct output: {entry['output']}")
    print()

    for json_key in ("model 1 response", "model 2 response"):
        prompt = f"Given the input `{input_string}`, and correct output `{entry['output']}`, score the model response `{entry[json_key]}` on a scale from 0 to 100, where 100 is the best score. Respond with the integer number only."

        response = client.chat.completions.create(
            model="llama3",
            seed=123,
            temperature=0,
            max_tokens=5,
            messages=[
                {"role": "user", "content": prompt},
            ],
        )
        try:
            score = int(response.choices[0].message.content)
        except ValueError:
            continue
        print(f"{json_key}: {entry[json_key]}")
        print(f"score {score}/100")
        print()
    print("-" * 40)
	# /// script
	# dependencies = ["openai", "tqdm"]
	# ///

	# based on https://github.com/rasbt/LLMs-from-scratch/blob/aba7ed2eb1fce4ebbca28eeed11ab19687cb1764/ch07/03_model-evaluation/llm-instruction-eval-ollama.ipynb

	from openai import OpenAI
	from tqdm import tqdm

	json_data = [
	{
	"instruction": "Name 3 different animals that are active during the day.",
	"input": "",
	"output": "1. Squirrel\n2. Eagle\n3. Tiger",
	"model 1 response": "\n1. Squirrel\n2. Tiger\n3. Eagle\n4. Cobra\n5. Tiger\n6. Cobra",
	"model 2 response": "\n1. Squirrel\n2. Eagle\n3. Tiger",
	},
	{
	"instruction": "Take the following noun and turn it into a verb.",
	"input": "Light",
	"output": "To light.",
	"model 1 response": "\nTo light.",
	"model 2 response": "\nLight is the most abundant element in the periodic table.",
	},
	]


	def format_input(entry):
	instruction_text = f"""\
	Below is an instruction that describes a task. Write a response that appropriately completes the request.

	### Instruction:
	{entry["instruction"]}\
	"""
	input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""
	return instruction_text + input_text


	client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")

	for entry in tqdm(json_data, desc="Scoring entries"):
	input_string = format_input(entry)
	print(input_string)
	print()
	print(f"correct output: {entry['output']}")
	print()

	for json_key in ("model 1 response", "model 2 response"):
	prompt = f"Given the input `{input_string}`, and correct output `{entry['output']}`, score the model response `{entry[json_key]}` on a scale from 0 to 100, where 100 is the best score. Respond with the integer number only."

	response = client.chat.completions.create(
	model="llama3",
	seed=123,
	temperature=0,
	max_tokens=5,
	messages=[
	{"role": "user", "content": prompt},
	],
	)
	try:
	score = int(response.choices[0].message.content)
	except ValueError:
	continue
	print(f"{json_key}: {entry[json_key]}")
	print(f"score {score}/100")
	print()
	print("-" * 40)