Skip to content

Instantly share code, notes, and snippets.

@hweller1
hweller1 / hybrid_search_example.py
Last active March 1, 2024 00:58
basic example showing how to use reciprocal rank fusion to join results of a vector and full text search on indexes built from the same collection, with the same query
import os
import pymongo
os.environ["OPENAI_API_KEY"] = '<openai API key>'
import openai
### SETUP
@hweller1
hweller1 / parent_document_retriever.py
Last active October 13, 2023 17:37
Simple example showing how to search against sentences and retrieve a related page that it belongs to as context
import os
import pymongo
import time
import openai
embeddings = openai.Embedding.create(
input="What is a transformer?",
model="text-embedding-ada-002"
)
@hweller1
hweller1 / generate_exact_result_set.py
Created October 4, 2023 21:55
Generate exact vector search result set using dot product similarity between a set of query vectors and a set of stored vectors
import numpy as np
from sentence_transformers import SentenceTransformer
queries = [f"What is {x}" for x in names]
encoded_queries = {}
model = SentenceTransformer('sentence-transformers/facebook-dpr-question_encoder-single-nq-base')
results = {}
top_result = []
for i, query in enumerate(queries):
print(f"Calculating similarity for query {i}")
names = ["Qualcomm",
"Hewlett Packard Enterprise",
"British American Tobacco",
"Visa",
"China Pacific Insurance",
"MetLife",
"AstraZeneca",
"Altria Group",
"SAP",
"Costco Wholesale",
@hweller1
hweller1 / rsf_via_union_with.py
Last active November 15, 2023 16:27
relative score fusion using unionWith and group, using the scores yielded from the searchMeta
import pymongo
import time
from sentence_transformers import SentenceTransformer
from companies import names # List of company names from another python file
### DESCRIPTION
"""
@hweller1
hweller1 / rrf_via_unionWith.py
Last active February 23, 2024 09:40
Perform reciprocal rank fusion using $push to expose rank, and $unionWith and $group to join result sets of vector search and full text search
import pymongo
import time
from sentence_transformers import SentenceTransformer
from companies import names # list of company names in a separate python file
### DESCRIPTION
"""
Search against the Sphere dataset using vector search results fused with full text search results via reciprocal rank fusion.
def compute_overlap(exact_result_set: List, approx_result_set: List) -> float:
# each result set is a list of urls, order not considered
return len(exact_result_set.intersection(set(approx_result_set)) / len(set(exact_result_set))
$search": {
"index":'1M_sphere_index',
"knnBeta": {
"path": "vector",
"vector": embedding.tolist(),
"k": k * multiplier,
# "filter":{
# "equals":{
# "path":"low_card",
# "value":1
@hweller1
hweller1 / sample_index_definition.py
Last active December 1, 2023 20:06
new index definition
{
"type": "vectorSearch,
"fields": [{
"path": "plot_embedding",
"dimensions": 1536,
"similarity": "cosine",
"type": "vector"
}]
}
{
"type": "vectorSearch,
"fields": [{
"path": "plot_embedding_hf",
"dimensions": 384,
"similarity": "dotProduct",
"type": "vector"
}]
}