Skip to content

Instantly share code, notes, and snippets.

Last active August 4, 2019 18:44
Show Gist options
  • Save bsod90/8c272ee9c8f14cf99b420cabcc7ef53a to your computer and use it in GitHub Desktop.
Save bsod90/8c272ee9c8f14cf99b420cabcc7ef53a to your computer and use it in GitHub Desktop.
# Connect to the semantic-search service and run the query
con = get_victor_connection()
response = con.FindTopKProblems(
# In our implementation -1 means "return all matches"
# Because we're using Cosine Similarity to find closes vectors,
# the resulting distance will always be in the range from -1 to 1.
# This allows us to easily define a confidence threshold and
# consider anything above this threshold to be a
# "High Confidence Result"
# A custom scoring function we're goint to pass to ElasticSearch
# More information here:
scroring_functions = [
"filter": {"term": {"document_id":}},
# This function will re-shape elastic search results in a way
# that those, that scored well in similarity search, will move higher
"weight": math.pow(
document_count, match.distance
} for match in response.matches
high_confidence_docs = [ for match in response.matches
if match.distance > CONFIDENCE_THRESHOLD
elastic_query = {
'bool': {
'should': [{
"match": {
"title": search_query,
# You can include any extra fields that you want to
# query for your documents here
# ...
# The "terms" part of the query would ensure that anything that
# has been found by Victor will always be returned by ElasticSearch
"terms": {
"document_id": high_confidence_docs,
# You can add extra boost to high confidence results
"boost": 10,
params = {
'query': {
"function_score": {
"query": elastic_query
"functions": scoring_functions
# We use
# It's a python wrapper around narive ES DSL
from elasticsearch_dsl import Search
from import Document
search = Search.from_dict(params)
search = search.index(Document.get_index_name())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment