Skip to content

Instantly share code, notes, and snippets.

@chenhunghan
Last active April 1, 2024 13:05
Show Gist options
  • Save chenhunghan/eed59a465205a3c29c7a170cce4be6e1 to your computer and use it in GitHub Desktop.
Save chenhunghan/eed59a465205a3c29c7a170cce4be6e1 to your computer and use it in GitHub Desktop.
DSPy prompt evaluation with metric using llama.cpp
# A gist for using the `llama.cpp` model with the `dspy` library.
#
# DSPy features used in this gist
# - `dspy.Predict`
# - `dspy.Signature`
# - `dspy.context`
# The script first prompts the model to answer a example question and assess the correctness and engagingness of the answer.
#
# Install `llama.cpp` from brew with built-in OpenAI-compatible server.
# brew install ggerganov/ggerganov/llama.cpp
# llama-server --hf-repo TheBloke/Mistral-7B-Instruct-v0.2-GGUF --model mistral-7b-instruct-v0.2.Q4_K_M.gguf --hf-file mistral-7b-instruct-v0.2.Q4_K_M.gguf
import dspy
# Optional for displaying the results on stdout as tables
from rich import print
from rich.table import Table
# The example question-answer pair, we already know the answer is `yes` and want to access the correctness and engagingness
# of the answer from the model
example = dspy.Example(
question="Are both Nehi and Nectar d.o.o. part of the beverage industry?",
answer="yes",
)
# The `llama.cpp` model
llama_cpp_model = dspy.OpenAI(
# assume llama-server is running on localhost:8080
api_base="http://localhost:8080/v1/",
# placeholder, or it will raise an error
api_key="none",
# for some reasons, an error will be raised if set to `text` (llama-server issue?)
model_type="chat",
# stop word for mistral-7b-instruct-v0.2
stop="\n\n",
# max number of tokens to generate
max_tokens=250,
)
dspy.settings.configure(lm=llama_cpp_model)
# A dspy signature for automatic assessments of a question-answer pair
class Assess(dspy.Signature):
"""Assess the quality of a answer of a question."""
assessed_text = dspy.InputField()
assessment_question = dspy.InputField()
assessment_answer = dspy.OutputField(desc="Yes or No")
# the predict module built from the assessment signature
# use in the correct_engaging_metric function
assess_pred = dspy.Predict(Assess)
# a metric returning a score between 0 and 1 for the correctness of the answer and the engagingness of the answer
def correct_engaging_metric(gold, pred, trace=None):
question, answer, gen_answer = gold.question, gold.answer, pred.answer
engaging = "Is the assessed text self-contained, information?"
correct = f"The text should answer `{question}` with `{answer}`. Does the assessed text contain this answer?"
with dspy.context(lm=llama_cpp_model):
correct = assess_pred(assessed_text=gen_answer, assessment_question=correct)
engaging = assess_pred(assessed_text=gen_answer, assessment_question=engaging)
correct, engaging = [
"yes" in m.assessment_answer.lower() for m in [correct, engaging]
]
score = correct + engaging
if trace is not None:
return score >= 2 # noqa: E701
return score / 2.0
# A predict module accept a signature (can be string or a `dspy.Signature` class)
# the following are example signature strings
# question -> answer
# sentence -> sentiment
# document -> sunmary
# text -> gist
# long_context -> tldr
# content, question -> answer
# question, choices -> reasoning, selection
#
# example:
# predict_module = dspy.Predict('document -> sunmary')
# a predict module for answering questions
qa_predict_module = dspy.Predict("question -> answer")
# prompt the llm to answer the question
output = qa_predict_module(question=example.question)
score = correct_engaging_metric(example, output)
table = Table(title="Metrics")
table.add_column("Question")
table.add_column("Expected Answer")
table.add_column("Generated Answer")
table.add_column("Score (0..1)", style="green")
table.add_row(example.question, example.answer, output.answer, str(score))
print(table)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment