Skip to content

Instantly share code, notes, and snippets.

@tedsecretsource
Created April 2, 2024 08:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tedsecretsource/c236b06ca7721b9ed5a2681f25ecfbe9 to your computer and use it in GitHub Desktop.
Save tedsecretsource/c236b06ca7721b9ed5a2681f25ecfbe9 to your computer and use it in GitHub Desktop.
Set up and run langsmith tests
import os
import logging
from langchain import hub
from langsmith import Client
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv()) # read local .env file
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnableMap, RunnablePassthrough
logging.basicConfig(level=logging.DEBUG)
# Since chains and agents can be stateful (they can have memory),
# create a constructor to pass in to the run_on_dataset method.
def create_runnable():
llm = ChatOpenAI(model_name=os.environ["SARASWATI_LLM"], temperature=0)
prompt = get_system_prompt()
return RunnableMap({"context": RunnablePassthrough(), "question": RunnablePassthrough()}) | prompt | llm
def get_system_prompt():
system_prompt_repo = os.getenv("SARASWATI_SYSTEM_PROMPT")
return hub.pull(system_prompt_repo) if system_prompt_repo is not None else (
"Use the following pieces of context to answer the users question. \n"
"If you don't know the answer, just say that you don't know, don't try to make up an answer.\n"
"----------------"
)
def _doc_exists(doc_source, documents):
"""Check if a document exists in a list of documents based on its source."""
return any(doc.metadata['source'] == doc_source for doc in documents)
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pgvector import PGVector
def get_question_context(question: str):
doc_list = []
context = ""
db = PGVector(
embedding_function=OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"]),
collection_name=os.environ["SARASWATI_COLLECTION_ID"],
connection_string=os.environ["DATABASE_URL"]
)
search_results = db.similarity_search(question, k=int(os.environ['SARASWATI_SIMILARITY_SEARCH_K']))
for i, doc in enumerate(search_results, start=1):
logging.debug("Source: %s", doc.metadata['source'])
if not _doc_exists(doc.metadata['source'], doc_list):
context += doc.page_content
logging.debug("Context: %s", context)
return context
from langchain.smith import RunEvalConfig, run_on_dataset
example_inputs = [
"What are the main features of the Blue Card Renewal project?",
"Write a sufficient number of user stories to fully cover the feature identified as “Supporting online evidence provision” for the Blue Card Renewals project.",
"Write sufficient acceptance criteria to cover the following user story for the Blue Card Renewal project: I, as a learner, while on the attach evidence screen, can see the names of the files as they appear on my device after they have been uploaded so I can tell which files I've uploaded and which remain to be uploaded.",
"Given the following acceptance criteria, write one or more tests in code to fulfill the criteria: Given that I have uploaded multiple files, when I view the attach evidence screen, then I should see the names of all the uploaded files displayed in a list above the upload button.",
"Write a development plan for the feature identified as “Supporting online evidence provision” for the Blue Card Renewals project."
]
client = Client()
dataset_name = "Augmented Developer Dataset"
# Get a generator of all datasets with the given name
datasets = client.list_datasets(dataset_name=dataset_name)
logging.debug("type: %s", type(datasets))
logging.debug("Datasets: %s", datasets)
while True:
try:
dataset = next(datasets)
logging.debug("Dataset: %s", dataset)
except StopIteration:
logging.debug("No more datasets")
break
if dataset is None:
dataset = client.create_dataset(
dataset_name=dataset_name, description="Augmented Developer prompts",
)
# If no datasets exist, create one
for input_prompt in example_inputs:
# Each example must be unique and have inputs defined.
# Outputs are optional
client.create_example(
inputs={"question": input_prompt, "context": get_question_context(input_prompt)},
outputs=None,
dataset_id=dataset.id,
)
eval_config = RunEvalConfig(
evaluators=[
# You can specify an evaluator by name/enum.
# In this case, the default criterion is "helpfulness"
"criteria",
# Or you can configure the evaluator
RunEvalConfig.Criteria("harmfulness"),
# RunEvalConfig.Criteria(
# {"cliche": "Are the lyrics cliche?"
# " Respond Y if they are, N if they're entirely unique."}
# )
],
input_key="question"
)
run_on_dataset(
client=client,
dataset_name=dataset_name,
llm_or_chain_factory=create_runnable,
evaluation=eval_config,
verbose=True,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment