Skip to content

Instantly share code, notes, and snippets.

@joshreini1
Last active July 20, 2023 12:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joshreini1/0f9d6cdae872cf2933075b0bac59484c to your computer and use it in GitHub Desktop.
Save joshreini1/0f9d6cdae872cf2933075b0bac59484c to your computer and use it in GitHub Desktop.
Evaluate Pinecone Configurations with TruLens
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
# completion llm
llm = ChatOpenAI(
model_name='gpt-3.5-turbo',
temperature=0.0
)
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever()
)
tru.run_dashboard()
# Imports main tools for eval
from trulens_eval import TruChain, Feedback, Tru, feedback, Select
import numpy as np
tru = Tru()
# OpenAI as feedback provider
openai = feedback.OpenAI()
# Question/answer relevance between overall question and answer.
qa_relevance = Feedback(openai.relevance).on_input_output()
# By default this will evaluate feedback on main app input and main app output.
# Question/statement relevance between question and each context chunk.
qs_relevance =
Feedback(openai.qs_relevance).
on_input()
.on(Select.Record.app.combine_documents_chain._call.args.inputs.input_documents[:].page_content)
.aggregate(np.mean)
# wrap with TruLens
truchain = TruChain(qa,
app_id='Chain1_WikipediaQA',
feedbacks=[qa_relevance, qs_relevance])
pinecone.create_index(
name=index_name_v1,
metric='cosine', # we'll try each distance metric here
dimension=1536, # 1536 dim of text-embedding-ada-002,
metadata_config={“indexed”:[]} # pass empty list for names of indexed metadata fields
)
# completion llm
from langchain.llms import OpenAI
llm = OpenAI(
model_name='text-ada-001',
temperature=0
)
from langchain.chains import RetrievalQAWithSourcesChain
qa_with_sources = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever()
)
# wrap with TruLens
truchain = TruChain(qa_with_sources,
app_id='Chain4_WikipediaQA',
feedbacks=[qa_relevance, qs_relevance])
index_name_v2 = 'langchain-rag-euclidean'
pinecone.create_index(
name=index_name_v2,
metric='euclidean', # metric=’dotproduct’,
dimension=1536, # 1536 dim of text-embedding-ada-002
)
embed = OpenAIEmbeddings(
model='text-embedding-ada-002',
openai_api_key=OPENAI_API_KEY
)
from langchain.vectorstores import Pinecone
text_field = "text"
# switch back to normal index for langchain
index = pinecone.Index(index_name_v1)
vectorstore = Pinecone(
index, embed.embed_query, text_field
)
import pinecone_datasets
dataset = pinecone_datasets.load_dataset('wikipedia-simple-text-embedding-ada-002-100K')
dataset.head()
for batch in dataset.iter_documents(batch_size=100):
index.upsert(batch)
qa= RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(top_k = 1)
)
qs_relevance = Feedback(openai.qs_relevance).on_input().on(
Select.Record.app.combine_documents_chain._call.args.inputs.input_documents[:1].page_content
).aggregate(np.mean)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment