-
-
Save goofansu/da6f1a7e07a3099021d93b60367411fa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import phoenix as px | |
| import pandas as pd | |
| from phoenix.evals import llm_generate, LiteLLMModel | |
| from phoenix.trace import SpanEvaluations | |
| SCORE_TEMPLATE = """You are a helpful AI bot that checks for grammatical, spelling and typing errors | |
| in a document context. You are going to return a continous score for the | |
| document based on the percent of grammatical and typing errors. The score should be | |
| between 10 and 1. A score of 1 will be no grammatical errors in any word, | |
| a score of 2 will be 20% of words have errors, a 5 score will be 50% errors, | |
| a score of 7 is 70%, and a 10 score will be all words in the context have a | |
| grammatical errors. | |
| The following is the document context. | |
| #CONTEXT | |
| {context} | |
| #ENDCONTEXT | |
| #QUESTION | |
| Please return a score between 10 and 1. | |
| You will return no other text or language besides the score. Only return the score. | |
| Please return in a format that is "the score is: 10" or "the score is: 1" | |
| """ | |
| data = pd.DataFrame([{"context": "Hello, wod."}]) | |
| def find_score(output): | |
| # Regular expression pattern | |
| # It looks for 'score is', followed by any characters (.*?), and then a float or integer | |
| pattern = r"score is.*?([+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)" | |
| match = re.search(pattern, output, re.IGNORECASE) | |
| if match: | |
| # Extract and return the number | |
| return float(match.group(1)) | |
| else: | |
| return None | |
| def numeric_score_eval(output, row_index): | |
| # This is the function that will be called for each row of the dataframe | |
| score = find_score(output) | |
| return {"score": score} | |
| test_results = llm_generate( | |
| dataframe=data, | |
| template=SCORE_TEMPLATE, | |
| model=LiteLLMModel(model="openrouter/openai/gpt-4o-mini"), | |
| verbose=True, | |
| output_parser=numeric_score_eval, | |
| include_prompt=True, | |
| include_response=True, | |
| ) | |
| if "context.span_id" not in test_results.index.names: | |
| test_results.index.name = "context.span_id" | |
| print(test_results) | |
| px.Client().log_evaluations( | |
| SpanEvaluations(eval_name="Relevance", dataframe=test_results) | |
| ) |
Author
goofansu
commented
Jan 17, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment