Skip to content

Instantly share code, notes, and snippets.

@berggren
Last active March 5, 2024 19:49
Show Gist options
  • Save berggren/6d0a41fc5b1329dff30868d6cdb5e1d8 to your computer and use it in GitHub Desktop.
Save berggren/6d0a41fc5b1329dff30868d6cdb5e1d8 to your computer and use it in GitHub Desktop.
import chromadb
class VectorStore:
def __init__(self, collection_name) -> None:
self.client = chromadb.PersistentClient(path="/tmp/embeddings.vector")
self.collection_name = collection_name
self.collection = self.client.get_or_create_collection(collection_name)
def embed_documents(
self, docs: list, metadatas: list, ids: list, refresh: bool = False
) -> chromadb.Collection:
if refresh:
print("Refreshing collection")
self.client.delete_collection(name=self.collection.name)
self.collection = self.client.get_or_create_collection(self.collection_name)
elif len(ids) == self.collection.count():
print("Documents already embedded")
return
print(f"Embedding {len(ids)} documents..")
self.collection.upsert(documents=docs, metadatas=metadatas, ids=ids)
def query_collection(self, query, num_results=10):
results = self.collection.query(
query_texts=[query],
n_results=num_results,
)
return results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment