Skip to content

Instantly share code, notes, and snippets.

@hweller1
Last active October 13, 2023 17:37
Show Gist options
  • Save hweller1/4394acb098763320533411eea25c1643 to your computer and use it in GitHub Desktop.
Save hweller1/4394acb098763320533411eea25c1643 to your computer and use it in GitHub Desktop.
Simple example showing how to search against sentences and retrieve a related page that it belongs to as context
import os
import pymongo
import time
import openai
embeddings = openai.Embedding.create(
input="What is a transformer?",
model="text-embedding-ada-002"
)
embeddings = embeddings.data[0].embedding
client = pymongo.MongoClient("") # mongodb cluster URI
db = client['vector-test']
coll = db['nested_test']
times = []
vector_agg_with_lookup = [
{
"$vectorSearch": {
"index": "vector_index",
"path": "vector",
"queryVector": embeddings,
"limit": 10,
"numCandidates": 50,
"filter": {"$eq": {"$doc_level": "sentence"}},
},
},
{
"$project": {"text": 1, "page": 1, "doc_level": 1},
},
{
"$lookup": {
"from": "nested_test",
"localField": "page",
"foreignField": "page",
"as": "parent_context",
"pipeline": [{"$match": {"doc_level": "page"}}, {"$unwind": "$parent_context"}],
}
}]
x = coll.aggregate(vector_agg_with_lookup)
parent_context = doc["parent_context"]["text"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment