Skip to content

Instantly share code, notes, and snippets.

@nicklockwood
Created December 21, 2020 18:52
Show Gist options
  • Save nicklockwood/27f45d8e8710bc53ad1fbbdfbeb7c68f to your computer and use it in GitHub Desktop.
Save nicklockwood/27f45d8e8710bc53ad1fbbdfbeb7c68f to your computer and use it in GitHub Desktop.
Swift code to calculate the Jaro-Winkler edit distance between two strings
/// The Jaro-Winkler edit distance between two strings (0 - 1)
func editDistance(_ lhs: String, _ rhs: String) -> Double {
return 1 - jaroWinklerSimilarity(Array(lhs), Array(rhs))
}
/// Jaro-Winkler similarity between two strings (0 - 1)
/// https://www.geeksforgeeks.org/jaro-and-jaro-winkler-similarity/
private func jaroWinklerSimilarity(_ s1: [Character], _ s2: [Character]) -> Double {
var jaro = jaroSimilarity(s1, s2)
// If the jaro Similarity is above a threshold
if jaro > 0.7 {
// Find the length of common prefix
var prefix = 0.0
for i in 0 ..< min(s1.count, s2.count) {
// If the characters match
if s1[i] == s2[i] {
prefix += 1
} else {
break
}
}
// Maximum of 4 characters are allowed in prefix
prefix = min(4, prefix)
// Calculate jaro winkler Similarity
jaro += 0.1 * prefix * (1 - jaro)
}
return jaro
}
/// The Jaro similarity between two strings (0 - 1)
/// https://www.geeksforgeeks.org/jaro-and-jaro-winkler-similarity/
private func jaroSimilarity(_ s1: [Character], _ s2: [Character]) -> Double {
// If the Strings are equal
if s1 == s2 {
return 1
}
// Lengths
let len1 = s1.count
let len2 = s2.count
// Maximum distance up to which matching is allowed
let maxDist = Int(Double(max(len1, len2) / 2) - 1)
// Number of matches
var matches = 0.0
// Hash for matches
var hash1 = [Int](repeating: 0, count: len1)
var hash2 = [Int](repeating: 0, count: len2)
// Traverse through the first String
for i in 0 ..< len1 {
// Check for matches
let start = max(0, i - maxDist)
for j in start ..< max(start, min(len2, i + maxDist + 1)) {
// If there is a match
if s1[i] == s2[j], hash2[j] == 0 {
hash1[i] = 1
hash2[j] = 1
matches += 1
break
}
}
}
// If there is no match
if matches == 0 {
return 0
}
// Number of transpositions
var t = 0.0
// Count number of occurances where two characters match but
// there is a third matched character in between the indices
var j = 0
for i in 0 ..< len1 where hash1[i] == 1 {
// Find the next matched character
// in second String
while hash2[j] == 0 {
j += 1
}
if s1[i] != s2[j] {
j += 1
t += 1
}
}
// Return the Jaro Similarity
return (matches / Double(len1) + matches / Double(len2) + (matches - t / 2) / matches) / 3
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment