Skip to content

Instantly share code, notes, and snippets.

@hamletbatista
Last active December 6, 2019 17:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hamletbatista/af259cf42d3ca7635e31deae6bc8976c to your computer and use it in GitHub Desktop.
Save hamletbatista/af259cf42d3ca7635e31deae6bc8976c to your computer and use it in GitHub Desktop.
import heapq
TOP_N = 5
BEST_ONLY = False
THRESHOLD_PROBABILITY = 0.65
def get_similarity_suggestion(phrase, no_percentage=False):
graph = tf.Graph()
with tf.compat.v1.Session(graph = graph) as session:
embed = hub.Module(module_url)
similarity_input_placeholder = tf.compat.v1.placeholder(tf.string, shape=(None))
similarity_message_encodings = embed(similarity_input_placeholder)
session.run(tf.compat.v1.global_variables_initializer())
session.run(tf.compat.v1.tables_initializer())
to_find_embeddings = session.run(similarity_message_encodings, feed_dict={similarity_input_placeholder: [phrase]})
result = np.inner(message_embeddings, to_find_embeddings)
top_N_indexes = heapq.nlargest(TOP_N, range(len(result)), result.take)
if BEST_ONLY:
top_N_indexes = [index for index in top_N_indexes if result[index] > THRESHOLD_PROBABILITY]
to_return = list()
for i in top_N_indexes:
matched = df_404s.iloc[i]
if no_percentage:
to_return.append(matched['phrase'])
else:
to_return.append([str(matched['phrase']), '%.2f' % float(result[i]*100), i])
return to_return
#Here we test one of the 404 phrases
test_phrase = df_404s["phrase"].iloc[0] # -> ' shop by collection wonderland rainbow'
results = get_similarity_suggestion(test_phrase, no_percentage=False)
print(results)
# This is what the suggested matches looks like.
#[[' shop by collection wonderland rainbow', '21.87', 0],
# [' catalog gold earrings gold cascade earrings p 200 ', '16.33', 1],
# [' shop by collection silver rain silver jewelry 1 ', '1.48', 2]]
#You can iterate this line over all 404 urls to get the top matching suggestions for each url.
#Please try this as a homework exercise
#
#results = get_similarity_suggestion(test_phrase, no_percentage=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment