Skip to content

Instantly share code, notes, and snippets.

@Lord-V15
Created April 16, 2021 17:23
Show Gist options
  • Save Lord-V15/c0b97d7546f271d6e5f86125323dd917 to your computer and use it in GitHub Desktop.
Save Lord-V15/c0b97d7546f271d6e5f86125323dd917 to your computer and use it in GitHub Desktop.
Settings and search calls in ElasticSearch for vector similarity search
# Idea : use this to index Quora's Question Pair dataset and explore the search results
doc = {
"settings": {
"index.knn": True
},
"mappings": {
"properties": {
"title": {
"type" : "text"
},
"title_vector": {
"type": "knn_vector",
"dimension": 128
}
}
}
}
es.indices.create(index="questions",body=doc,ignore=400) # New index created using format specified above
def search_knn(title):
x = np.asarray(embed([title])).tolist()[0] # embed() creates embeddings of a sentence of 128 dimensions
script_query = {
"knn": {
"title_vector": {
"vector": x,
"k": 2 # K=2 so a maximum of 2-NN
}
}
}
response = es.search(
index="questions",
body={
"size": 10, # Choose any size limit
"query": script_query,
"_source": {"includes": ["title"]} # Must have a title
}
)
return response
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment