from sklearn.metrics.pairwise import cosine_similarity | |
# Get similar classes | |
distances = cosine_similarity(c_tf_idf, c_tf_idf) | |
np.fill_diagonal(distances, 0) | |
# For each class, extract the most similar class | |
result = pd.DataFrame([(newsgroups.target_names[index], | |
newsgroups.target_names[distances[index].argmax()]) | |
for index in range(len(docs_per_class))], | |
columns=["From", "To"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment