Created
June 27, 2014 05:54
-
-
Save bgreenlee/52d93a1d8fa1b8c1f38b to your computer and use it in GitHub Desktop.
Levenshtein Distance in Swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Levenshtein edit distance calculator | |
* Usage: levenstein <string> <string> | |
* | |
* To compile: | |
* sudo xcode-select -switch /Applications/Xcode6-Beta.app/Contents/Developer | |
* xcrun swift -sdk $(xcrun --show-sdk-path --sdk macosx) levenshtein.swift | |
*/ | |
import Foundation | |
// return minimum value in a list of Ints | |
func min(numbers: Int...) -> Int { | |
return numbers.reduce(numbers[0], {$0 < $1 ? $0 : $1}) | |
} | |
func levenshtein(aStr: String, bStr: String) -> Int { | |
// create character arrays | |
let a = Array(aStr) | |
let b = Array(bStr) | |
// initialize matrix of size |a|+1 * |b|+1 to zero | |
var dist = Int[][]() | |
for row in 0...a.count { | |
dist.append(Int[](count: b.count + 1, repeatedValue: 0)) | |
} | |
// 'a' prefixes can be transformed into empty string by deleting every char | |
for i in 1...a.count { | |
dist[i][0] = i | |
} | |
// 'b' prefixes can be created from empty string by inserting every char | |
for j in 1...b.count { | |
dist[0][j] = j | |
} | |
for i in 1...a.count { | |
for j in 1...b.count { | |
if a[i-1] == b[j-1] { | |
dist[i][j] = dist[i-1][j-1] // noop | |
} else { | |
dist[i][j] = min( | |
dist[i-1][j] + 1, // deletion | |
dist[i][j-1] + 1, // insertion | |
dist[i-1][j-1] + 1 // substitution | |
) | |
} | |
} | |
} | |
return dist[a.count][b.count] | |
} | |
if Process.arguments.count != 3 { | |
println("Usage: levenstein <string> <string>") | |
exit(0) | |
} | |
println(levenshtein(Process.arguments[1], Process.arguments[2])) |
You'll find an improved version here : https://gist.github.com/kyro38/50102a47937e9896e4f4
Here is my (more generalized) version: https://gist.github.com/shergin/a6dba6ee5ae3b9ecb16c
I went and improved on the prior versions to incorporate all of the optimizations available in Swift 2.2. You can find the gist here: https://gist.github.com/TheDarkCode/341ec5b84c078a0be1887c2c58f6d929
However, I've also turned it into a cocoapod: https://www.github.com/TheDarkCode/SwiftyLevenshtein
Statements like this for i in 1...a.count
will lead to crash if a.count
is 0.
Always use something like 1..<count
to avoid any possibility of crashing.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Dramatically slower then this implementation https://gist.github.com/boratlibre/1593632
I've send couple of hours to make it faster but... It seems like Swift arrays and Strings manipulation are not as fast as objC.
On 2000 random Strings calculations Swift implementation is about 100(!!!) times slower then ObjC.