Skip to content

Instantly share code, notes, and snippets.

@goofansu
Created January 17, 2025 07:26
Show Gist options
  • Select an option

  • Save goofansu/da6f1a7e07a3099021d93b60367411fa to your computer and use it in GitHub Desktop.

Select an option

Save goofansu/da6f1a7e07a3099021d93b60367411fa to your computer and use it in GitHub Desktop.
import re
import phoenix as px
import pandas as pd
from phoenix.evals import llm_generate, LiteLLMModel
from phoenix.trace import SpanEvaluations
SCORE_TEMPLATE = """You are a helpful AI bot that checks for grammatical, spelling and typing errors
in a document context. You are going to return a continous score for the
document based on the percent of grammatical and typing errors. The score should be
between 10 and 1. A score of 1 will be no grammatical errors in any word,
a score of 2 will be 20% of words have errors, a 5 score will be 50% errors,
a score of 7 is 70%, and a 10 score will be all words in the context have a
grammatical errors.
The following is the document context.
#CONTEXT
{context}
#ENDCONTEXT
#QUESTION
Please return a score between 10 and 1.
You will return no other text or language besides the score. Only return the score.
Please return in a format that is "the score is: 10" or "the score is: 1"
"""
data = pd.DataFrame([{"context": "Hello, wod."}])
def find_score(output):
# Regular expression pattern
# It looks for 'score is', followed by any characters (.*?), and then a float or integer
pattern = r"score is.*?([+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)"
match = re.search(pattern, output, re.IGNORECASE)
if match:
# Extract and return the number
return float(match.group(1))
else:
return None
def numeric_score_eval(output, row_index):
# This is the function that will be called for each row of the dataframe
score = find_score(output)
return {"score": score}
test_results = llm_generate(
dataframe=data,
template=SCORE_TEMPLATE,
model=LiteLLMModel(model="openrouter/openai/gpt-4o-mini"),
verbose=True,
output_parser=numeric_score_eval,
include_prompt=True,
include_response=True,
)
if "context.span_id" not in test_results.index.names:
test_results.index.name = "context.span_id"
print(test_results)
px.Client().log_evaluations(
SpanEvaluations(eval_name="Relevance", dataframe=test_results)
)
@goofansu
Copy link
Author

llm_generate |██████████████████████████████████████████████████████████████████████| 1/1 (100.0%) | ⏳ 00:02<00:00 |  2.29s/it
                 score                                             prompt         response
context.span_id
0                  9.0  You are a helpful AI bot that checks for gramm...  the score is: 9

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment