Skip to content

Instantly share code, notes, and snippets.

@bsod90
Last active August 4, 2019 18:44
Show Gist options
  • Save bsod90/8c272ee9c8f14cf99b420cabcc7ef53a to your computer and use it in GitHub Desktop.
Save bsod90/8c272ee9c8f14cf99b420cabcc7ef53a to your computer and use it in GitHub Desktop.
# Connect to the semantic-search service and run the query
con = get_victor_connection()
response = con.FindTopKProblems(
victor_pb2.TopKProblemsRequest(
query=search_query,
# In our implementation -1 means "return all matches"
k=-1,
),
)
# Because we're using Cosine Similarity to find closes vectors,
# the resulting distance will always be in the range from -1 to 1.
# This allows us to easily define a confidence threshold and
# consider anything above this threshold to be a
# "High Confidence Result"
CONFIDENCE_THRESHOLD = 0.65
# A custom scoring function we're goint to pass to ElasticSearch
# More information here: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
scroring_functions = [
{
"filter": {"term": {"document_id": match.id}},
# This function will re-shape elastic search results in a way
# that those, that scored well in similarity search, will move higher
"weight": math.pow(
document_count, match.distance
)
} for match in response.matches
]
high_confidence_docs = [
match.id for match in response.matches
if match.distance > CONFIDENCE_THRESHOLD
]
elastic_query = {
'bool': {
'should': [{
"match": {
"title": search_query,
}
},
#
# You can include any extra fields that you want to
# query for your documents here
# ...
#
#
# The "terms" part of the query would ensure that anything that
# has been found by Victor will always be returned by ElasticSearch
{
"terms": {
"document_id": high_confidence_docs,
# You can add extra boost to high confidence results
"boost": 10,
}
}]
}
}
params = {
'query': {
"function_score": {
"query": elastic_query
"functions": scoring_functions
}
}
}
# We use https://github.com/elastic/elasticsearch-dsl-py
# It's a python wrapper around narive ES DSL
from elasticsearch_dsl import Search
from our_project.search import Document
search = Search.from_dict(params)
search = search.index(Document.get_index_name())
search.execute()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment