izikeros/ragas_azureopenai.py

## ragas_azureopenai.py
"""
Requires the following environment variables (you can use it as .env_template):

COMPLETION_DEPLOYMENT_NAME=
EMBEDDING_DEPLOYMENT_NAME=
AZURE_OPENAI_API_KEY=
AZURE_OPENAI_ENDPOINT=
AZURE_OPENAI_API_VERSION=
"""

import os
from pprint import pprint

import langchain
import langchain_community
import openai
import ragas
from datasets import Dataset
from dotenv import find_dotenv, load_dotenv
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_openai.embeddings import AzureOpenAIEmbeddings
from ragas import evaluate
from ragas.evaluation import Result
from ragas.metrics import (
    answer_relevancy,
    context_precision,
    context_recall,
    faithfulness,
)

print("\n===========\nKey packages versions:\n===========\n")
print("ragas version: ", ragas.__version__)
print("openai version: ", openai.__version__)
print("langchain version: ", langchain.__version__)
print("langchain-community version: ", langchain_community.__version__)


# Load from huggingface dataset repository
# amnesty_qa = load_dataset(
#     "explodinggradients/amnesty_qa", "english_v2", trust_remote_code=True
# )

# small dataset with two samples (two: questions, answers, contexts and ground truth)
data_samples = {
    "question": ["When was the first super bowl?", "Who won the most super bowls?"],
    "answer": [
        "The first superbowl was held on January 15, 1967",
        "The most super bowls have been won by The New England Patriots",
    ],
    "contexts": [
        ["The Super Bowl....season since 1966,", "replacing the NFL...in February."],
        [
            "The Green Bay Packers...Green Bay, Wisconsin.",
            "The Packers compete...Football Conference",
        ],
    ],
    "ground_truth": [
        "The first superbowl was held on January 15, 1967",
        "The New England Patriots have won the Super Bowl a record six times",
    ],
}
dataset = Dataset.from_dict(data_samples)


# list of metrics we're going to use
metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
]

print("\n===========\nEnvironment variables:\n===========\n")
env = find_dotenv()
load_dotenv(env)
print(f"Loaded environment variables from {env}")

azure_config = {
    "api_version": os.getenv("AZURE_OPENAI_API_VERSION"),
    "azure_completion_deployment": os.getenv("COMPLETION_DEPLOYMENT_NAME"),
    "azure_embedding_deployment": os.getenv("EMBEDDING_DEPLOYMENT_NAME"),
}

pprint(azure_config)

# ===== Models =====
azure_model = AzureChatOpenAI(
    api_version=azure_config["api_version"],
    azure_deployment=azure_config["azure_completion_deployment"],
)

# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
azure_embeddings = AzureOpenAIEmbeddings(
    api_version=azure_config["api_version"],
    azure_deployment=azure_config["azure_embedding_deployment"],
)

# select single random example from the dataset
#  dataset = amnesty_qa["eval"].shuffle(seed=42).select(range(1))

result: Result = evaluate(
    dataset=dataset,
    metrics=metrics,
    llm=azure_model,
    embeddings=azure_embeddings,
    is_async=False,
    raise_exceptions=True,
)

print("\n===========\nEvaluation result:\n===========\n")
print(result)
print("\n-----------\nResult object\n-----------\n")
pprint(result.__dict__)

# convert the result to pandas dataframe
print("\n===========\nResult as pandas dataframe:\n===========\n")
df = result.to_pandas()
print(df.head())
print("\n---------Columns---------\n")
print(df.columns)

# ======== save the dataframe to json files ========
# ensure results dir exists
os.makedirs("results", exist_ok=True)
# all data related to given question together (orient="records") -> list of dictionaries
df.to_json("results/result_df_records.json", orient="records", indent=2)
# all data related to given metric together (orient="columns") -> dictionary of dictionaries
df.to_json("results/result_df_columns.json", orient="columns", indent=2)
	"""
	Requires the following environment variables (you can use it as .env_template):

	COMPLETION_DEPLOYMENT_NAME=
	EMBEDDING_DEPLOYMENT_NAME=
	AZURE_OPENAI_API_KEY=
	AZURE_OPENAI_ENDPOINT=
	AZURE_OPENAI_API_VERSION=
	"""

	import os
	from pprint import pprint

	import langchain
	import langchain_community
	import openai
	import ragas
	from datasets import Dataset
	from dotenv import find_dotenv, load_dotenv
	from langchain_openai.chat_models import AzureChatOpenAI
	from langchain_openai.embeddings import AzureOpenAIEmbeddings
	from ragas import evaluate
	from ragas.evaluation import Result
	from ragas.metrics import (
	answer_relevancy,
	context_precision,
	context_recall,
	faithfulness,
	)

	print("\n===========\nKey packages versions:\n===========\n")
	print("ragas version: ", ragas.__version__)
	print("openai version: ", openai.__version__)
	print("langchain version: ", langchain.__version__)
	print("langchain-community version: ", langchain_community.__version__)


	# Load from huggingface dataset repository
	# amnesty_qa = load_dataset(
	# "explodinggradients/amnesty_qa", "english_v2", trust_remote_code=True
	# )

	# small dataset with two samples (two: questions, answers, contexts and ground truth)
	data_samples = {
	"question": ["When was the first super bowl?", "Who won the most super bowls?"],
	"answer": [
	"The first superbowl was held on January 15, 1967",
	"The most super bowls have been won by The New England Patriots",
	],
	"contexts": [
	["The Super Bowl....season since 1966,", "replacing the NFL...in February."],
	[
	"The Green Bay Packers...Green Bay, Wisconsin.",
	"The Packers compete...Football Conference",
	],
	],
	"ground_truth": [
	"The first superbowl was held on January 15, 1967",
	"The New England Patriots have won the Super Bowl a record six times",
	],
	}
	dataset = Dataset.from_dict(data_samples)


	# list of metrics we're going to use
	metrics = [
	faithfulness,
	answer_relevancy,
	context_recall,
	context_precision,
	]

	print("\n===========\nEnvironment variables:\n===========\n")
	env = find_dotenv()
	load_dotenv(env)
	print(f"Loaded environment variables from {env}")

	azure_config = {
	"api_version": os.getenv("AZURE_OPENAI_API_VERSION"),
	"azure_completion_deployment": os.getenv("COMPLETION_DEPLOYMENT_NAME"),
	"azure_embedding_deployment": os.getenv("EMBEDDING_DEPLOYMENT_NAME"),
	}

	pprint(azure_config)

	# ===== Models =====
	azure_model = AzureChatOpenAI(
	api_version=azure_config["api_version"],
	azure_deployment=azure_config["azure_completion_deployment"],
	)

	# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
	azure_embeddings = AzureOpenAIEmbeddings(
	api_version=azure_config["api_version"],
	azure_deployment=azure_config["azure_embedding_deployment"],
	)

	# select single random example from the dataset
	# dataset = amnesty_qa["eval"].shuffle(seed=42).select(range(1))

	result: Result = evaluate(
	dataset=dataset,
	metrics=metrics,
	llm=azure_model,
	embeddings=azure_embeddings,
	is_async=False,
	raise_exceptions=True,
	)

	print("\n===========\nEvaluation result:\n===========\n")
	print(result)
	print("\n-----------\nResult object\n-----------\n")
	pprint(result.__dict__)

	# convert the result to pandas dataframe
	print("\n===========\nResult as pandas dataframe:\n===========\n")
	df = result.to_pandas()
	print(df.head())
	print("\n---------Columns---------\n")
	print(df.columns)

	# ======== save the dataframe to json files ========
	# ensure results dir exists
	os.makedirs("results", exist_ok=True)
	# all data related to given question together (orient="records") -> list of dictionaries
	df.to_json("results/result_df_records.json", orient="records", indent=2)
	# all data related to given metric together (orient="columns") -> dictionary of dictionaries
	df.to_json("results/result_df_columns.json", orient="columns", indent=2)