Skip to content

Instantly share code, notes, and snippets.

@xeraa
Last active October 12, 2023 10:00
Show Gist options
  • Save xeraa/c24b684b94812b330b2066f902a6b883 to your computer and use it in GitHub Desktop.
Save xeraa/c24b684b94812b330b2066f902a6b883 to your computer and use it in GitHub Desktop.
Reciprocal Rank Fusion
GET bytes-discuss-dense-search/_search
{
"query": {
"multi_match": {
"fields": [ "title" ],
"query": "I don't know how to use ingest pipelines"
}
},
"size": 6,
"fields": [ "title" ],
"_source": false
}
GET bytes-discuss-dense-search/_search
{
"knn": {
"field": "title_vector.predicted_value",
"k": 26,
"num_candidates": 2600,
"query_vector_builder": {
"text_embedding": {
"model_id": "sentence-transformers__all-mpnet-base-v2",
"model_text": "I don't know how to use ingest pipelines"
}
}
},
"size": 6,
"fields": [ "title" ],
"_source": false
}
GET bytes-discuss-sparse-search/_search
{
"query": {
"text_expansion": {
"ml.tokens": {
"model_id": ".elser_model_1",
"model_text": "I don't know how to use ingest pipelines"
}
}
},
"size": 6,
"fields": [ "title" ],
"_source": false
}
GET bytes-discuss-dense-search/_search
{
"knn": {
"field": "title_vector.predicted_value",
"k": 26,
"num_candidates": 2600,
"query_vector_builder": {
"text_embedding": {
"model_id": "sentence-transformers__all-mpnet-base-v2",
"model_text": "I don't know how to use ingest pipelines"
}
}
},
"query": {
"multi_match": {
"fields": [ "title" ],
"query": "I don't know how to use ingest pipelines"
}
},
"size": 6,
"fields": [ "title" ],
"_source": false
}
// score = knn_score + match_score
GET bytes-discuss-dense-search/_search
{
"knn": {
"field": "title_vector.predicted_value",
"k": 26,
"num_candidates": 2600,
"query_vector_builder": {
"text_embedding": {
"model_id": "sentence-transformers__all-mpnet-base-v2",
"model_text": "I don't know how to use ingest pipelines"
}
},
"boost": 0.9
},
"query": {
"multi_match": {
"fields": [ "title" ],
"query": "I don't know how to use ingest pipelines",
"boost": 0.2
}
},
"size": 6,
"fields": [ "title" ],
"_source": false
}
// score = 0.9 * knn_score + 0.2 * match_score
/*
Reciprocal Rank Fusion (RRF)
https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf
1. Generate each ranking
2. Calculate the reciprocal rank: 1st ranked item 1/(1+k), 2nd 1/(2+k),...
k = ranking constant, default 60
3. Sum reciprocal ranks for each document
4. Sort documents by calculated sums
Example (k=1)
# doc | query | knn | score
_id: 1 = 1.0/(1+4) + 1.0/(1+3) = 0.4500
_id: 2 = 1.0/(1+3) + 1.0/(1+2) = 0.5833
_id: 3 = 1.0/(1+2) + 1.0/(1+1) = 0.8333
_id: 4 = 1.0/(1+1) = 0.5000
_id: 5 = 1.0/(1+4) = 0.2000
*/
GET bytes-discuss-dense-search/_search
{
"knn": {
"field": "title_vector.predicted_value",
"k": 60,
"num_candidates": 2600,
"query_vector_builder": {
"text_embedding": {
"model_id": "sentence-transformers__all-mpnet-base-v2",
"model_text": "I don't know how to use ingest pipelines"
}
}
},
"query": {
"multi_match": {
"fields": [ "title" ],
"query": "I don't know how to use ingest pipelines"
}
},
"rank": {
"rrf": {
"window_size": 60
}
},
"size": 6,
"fields": [ "title" ],
"_source": false
}
GET bytes-discuss-sparse-search/_search
{
"sub_searches": [
{
"query": {
"text_expansion": {
"ml.tokens": {
"model_id": ".elser_model_1",
"model_text": "I don't know how to use ingest pipelines"
}
}
}
},
{
"query": {
"multi_match": {
"fields": [ "title" ],
"query": "I don't know how to use ingest pipelines"
}
}
}
],
"rank": {
"rrf": {}
},
"size": 6,
"fields": [ "title" ],
"_source": false
}
// (Un) supported features: https://www.elastic.co/guide/en/elasticsearch/reference/8.10/rrf.html#rrf-supported-features
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment