Skip to content

Instantly share code, notes, and snippets.

@janakiramm
Created August 3, 2023 06:57
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save janakiramm/1325a5c5f792f80c1db6a9bae40f8a37 to your computer and use it in GitHub Desktop.
Save janakiramm/1325a5c5f792f80c1db6a9bae40f8a37 to your computer and use it in GitHub Desktop.
ChromaDB for RAG with OpenAI
import pandas as pd
import openai
import chromadb
from chromadb.utils import embedding_functions
import os
df=pd.read_csv('./data/oscars.csv')
df=df.loc[df['year_ceremony'] == 2023]
df=df.dropna(subset=['film'])
df['category'] = df['category'].str.lower()
df['text'] = df['name'] + ' got nominated under the category, ' + df['category'] + ', for the film ' + df['film'] + ' to win the award'
df.loc[df['winner'] == False, 'text'] = df['name'] + ' got nominated under the category, ' + df['category'] + ', for the film ' + df['film'] + ' but did not win'
def text_embedding(text) -> None:
response = openai.Embedding.create(model="text-embedding-ada-002", input=text)
return response["data"][0]["embedding"]
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
api_key=os.environ["OPENAI_API_KEY"],
model_name="text-embedding-ada-002"
)
client = chromadb.Client()
collection = client.get_or_create_collection("oscars-2023",embedding_function=openai_ef)
docs=df["text"].tolist()
ids= [str(x) for x in df.index.tolist()]
collection.add(
documents=docs,
ids=ids
)
vector=text_embedding("Nominations for music")
results=collection.query(
query_embeddings=vector,
n_results=15,
include=["documents"]
)
res = "\n".join(str(item) for item in results['documents'][0])
prompt=f'```{res}```who won the award for the original song'
messages = [
{"role": "system", "content": "You answer questions about 95th Oscar awards."},
{"role": "user", "content": prompt}
]
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages,
temperature=0
)
response_message = response["choices"][0]["message"]["content"]
print(response_message)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment