Created
August 11, 2022 19:10
-
-
Save Spartee/1bf4502052cbee08eb05d8519d6b0f76 to your computer and use it in GitHub Desktop.
Semantic Similarity in Python with Huggingface sentence transformers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from numpy.linalg import norm | |
from sentence_transformers import SentenceTransformer | |
# Define the model we want to use (it'll download itself) | |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
sentences = [ | |
"That is a very happy person", | |
"That is a happy dog", | |
"Today is a sunny day" | |
] | |
# vector embeddings created from dataset | |
embeddings = model.encode(sentences) | |
# query vector embedding | |
query_embedding = model.encode("That is a happy person") | |
# define our distance metric | |
def cosine_similarity(a, b): | |
return np.dot(a, b)/(norm(a)*norm(b)) | |
# run semantic similarity search | |
print("Query: That is a happy person") | |
for e, s in zip(embeddings, sentences): | |
print(s, " -> similarity score = ", | |
cosine_similarity(e, query_embedding)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment