Created
February 7, 2018 14:53
-
-
Save chefnobody/49e4be408cc1f5a1b23e670ad2cf84d6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extension String { | |
// Subscript with an integer that returns the character | |
subscript(index: Int) -> Character { | |
return self[String.Index(encodedOffset: index)] | |
} | |
/* | |
Levenshtein Distance | |
https://en.wikipedia.org/wiki/Levenshtein_distance | |
Informally, the Levenshtein distance between two words is the minimum number of single-character edits (insertions, deletions or substitutions) required to change one word into the other. | |
Note: This algorithm plays nicely with single Unicode code blocks. Unicode with multiple code blocks will produce incorrect results. | |
*/ | |
func levenshteinDistance(from: String) -> Int { | |
// If source is empty, the distance must be the count of the target string | |
guard self.count > 0 else { return from.count } | |
// Conversely if the target is empty, the distance must be the length of the source | |
guard from.count > 0 else { return self.count } | |
// This is hard to read, but its an initialized matrix w/ all 0's | |
// of m + 1 and n + 1 length, respectively. | |
// Columns count equates to target string length | |
// Row count equates to source string length. | |
let m = self.count + 1 | |
let n = from.count + 1 | |
let columns: [Int] = Array<Int>(repeating: 0, count: n) | |
var matrix = Array<[Int]>(repeating: columns, count: m) | |
// prefixing 0'th row w/ 1..<m | |
for i in 1..<m { | |
matrix[i][0] = i | |
} | |
// prefixings 0'th column w/ 1..<n | |
for j in 1..<n { | |
matrix[0][j] = j | |
} | |
var cost = 0 | |
// Work out along columns j through each row i | |
for j in 1..<n { | |
for i in 1..<m { | |
// calculate cost | |
// Note: Use 0-based indexing to pull values from string | |
cost = self[i-1] == from[j-1] ? 0 : 1 | |
let deletions = matrix[i-1][j] + 1 | |
let insertions = matrix[i][j-1] + 1 | |
let substitutions = matrix[i-1][j-1] + cost | |
matrix[i][j] = Swift.min(deletions, insertions, substitutions) | |
} | |
} | |
return matrix[m-1][n-1] | |
} | |
} | |
// Edge case tests: | |
"".levenshteinDistance(from: "") | |
"".levenshteinDistance(from: "BEERS") | |
"PIZZA".levenshteinDistance(from: "") | |
// Some basic tests: | |
"Saturday".levenshteinDistance(from: "Sunday") | |
"kitten".levenshteinDistance(from: "sitting") | |
"PIZZA".levenshteinDistance(from: "PIZZA") | |
"PIZZA".levenshteinDistance(from: "PIZZ") | |
"PIZZA".levenshteinDistance(from: "PIZZAZ") | |
"PIZZA".levenshteinDistance(from:"BEERS") | |
"PIZZA".levenshteinDistance(from: "PEERS") | |
"PIZZA".levenshteinDistance(from: "pizza") | |
"four score and seven years ago".levenshteinDistance(from: "4 score and 7 years ago our fore fathers") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment