Skip to content

Instantly share code, notes, and snippets.

@fredriccliver
Created February 29, 2024 10:02
Show Gist options
  • Save fredriccliver/67641ff676263be8a53fd17b4475e216 to your computer and use it in GitHub Desktop.
Save fredriccliver/67641ff676263be8a53fd17b4475e216 to your computer and use it in GitHub Desktop.
calculate utterance similarity using DTW algorithm.
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw
import numpy as np
def calculate_similarity_percentage(distance, max_distance):
similarity_percentage = (1 - distance / max_distance) * 100
return similarity_percentage
def main():
test_sets = [
("Not yet. Oh, I can see it. Sorry, I can see it.",
"Not yet, Oh, I can see it. Sorry I can see."),
("test3",
"test4"),
("1234567890",
"0123456789"),
("100%, perfectly matched",
"100%, perfectly matched"),
("100%, perfectly matched",
"100%, perfectly matche."),
("non sementic parts is modified, so it's not p roblem.",
"non-sementic parts is modified. so it's not a problem."),
("I have a lots of experience in NLP. But NLP means Non-Lexical-Problem.",
"I have a lots of experience in NLP."),
# Add more test sets as needed
]
for text1, text2 in test_sets:
ascii_values1 = [ord(char) for char in text1]
ascii_values2 = [ord(char) for char in text2]
# Reshape for DTW
ascii_values1 = np.array(ascii_values1).reshape(-1, 1)
ascii_values2 = np.array(ascii_values2).reshape(-1, 1)
# Compute the DTW distance
distance, path = fastdtw(ascii_values1, ascii_values2, dist=euclidean)
# distance = calculate_dtw_distance(text1, text2)
max_distance = max(len(text1), len(text2)) * (122 - 32)
similarity_percentage = (1 - distance / max_distance) * 100
# similarity_percentage = calculate_similarity_percentage(distance, max_distance)
print("-------------------------")
print(f"Text 1: {text1}")
print(f"Text 2: {text2}")
# print(f"DTW Distance: {distance}")
print(f"Similarity Percentage: {similarity_percentage:.2f}%")
print("-------------------------")
# if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment