Created
May 14, 2020 14:12
-
-
Save narendraprasath/3d7c0b78210dca613241a3988ccb3eba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## helps to retrieve similar question based of input vectors/embeddings for test query | |
def retrieveSimilarFAQ(train_question_vectors, test_question_vectors, train_QA_df, train_column_name, test_QA_df, test_column_name): | |
similar_question_index = [] | |
for test_index, test_vector in enumerate(test_question_vectors): | |
sim, sim_Q_index = -1, -1 | |
for train_index, train_vector in enumerate(train_question_vectors): | |
sim_score = cosine_similarity(train_vector, test_vector)[0][0] | |
if sim < sim_score: | |
sim = sim_score | |
sim_Q_index = train_index | |
print("######") | |
print(f"Query Question: \t {test_QA_df[test_column_name].iloc[test_index]}") | |
print(f"Retrieved Question: \t {train_QA_df[train_column_name].iloc[sim_Q_index]}") | |
print("######") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment