MathiasGruber/get_best_match.py

## get_best_match.py
import numpy as np
from sklearn.preprocessing import normalize

# Use the first question as the query
QUERY_ID = 0

# Noralize the data
norm_data = normalize(sentence_embeddings, norm='l2')

# Calculate scores as dot product between all embedding & query
scores = np.dot(norm_data, norm_data[QUERY_ID].T)

# The best match is the entry with the second highest score (the highest is the query itself)
MATCH_ID = np.argsort(scores)[-2]
	import numpy as np
	from sklearn.preprocessing import normalize

	# Use the first question as the query
	QUERY_ID = 0

	# Noralize the data
	norm_data = normalize(sentence_embeddings, norm='l2')

	# Calculate scores as dot product between all embedding & query
	scores = np.dot(norm_data, norm_data[QUERY_ID].T)

	# The best match is the entry with the second highest score (the highest is the query itself)
	MATCH_ID = np.argsort(scores)[-2]