-
-
Save ademar111190/34d3de41308389a0d0d8 to your computer and use it in GitHub Desktop.
import kotlin.math.min | |
fun levenshtein(lhs : CharSequence, rhs : CharSequence) : Int { | |
if(lhs == rhs) { return 0 } | |
if(lhs.isEmpty()) { return rhs.length } | |
if(rhs.isEmpty()) { return lhs.length } | |
val lhsLength = lhs.length + 1 | |
val rhsLength = rhs.length + 1 | |
var cost = Array(lhsLength) { it } | |
var newCost = Array(lhsLength) { 0 } | |
for (i in 1..rhsLength-1) { | |
newCost[0] = i | |
for (j in 1..lhsLength-1) { | |
val match = if(lhs[j - 1] == rhs[i - 1]) 0 else 1 | |
val costReplace = cost[j - 1] + match | |
val costInsert = cost[j] + 1 | |
val costDelete = newCost[j - 1] + 1 | |
newCost[j] = min(min(costInsert, costDelete), costReplace) | |
} | |
val swap = cost | |
cost = newCost | |
newCost = swap | |
} | |
return cost[lhsLength - 1] | |
} |
Fix charAt, length and typo:
fun levenshtein(lhs : CharSequence, rhs : CharSequence) : Int { val lhsLength = lhs.length val rhsLength = rhs.length var cost = Array(lhsLength) { it } var newCost = Array(lhsLength) { 0 } for (i in 1..rhsLength-1) { newCost[0] = i for (j in 1..lhsLength-1) { val match = if(lhs[j - 1] == rhs[i - 1]) 0 else 1 val costReplace = cost[j - 1] + match val costInsert = cost[j] + 1 val costDelete = newCost[j - 1] + 1 newCost[j] = Math.min(Math.min(costInsert, costDelete), costReplace) } val swap = cost cost = newCost newCost = swap } return cost[lhsLength - 1] }
Got here through a random google search and used the code. Not sure if you want comments, but still:
The code doesn't work. Try using the algorithm with lhs='bar' and rhs='das'. the result is 1 while it should be 2.
Hi @moudemand thanks for the feedback.
I guess my mistake mistake was:
val lhsLength = lhs.length
val rhsLength = rhs.length
instead of
val lhsLength = lhs.length + 1
val rhsLength = rhs.length + 1
So, using it together to the @glureau and @abreslav fix we have:
fun levenshtein(lhs : CharSequence, rhs : CharSequence) : Int {
val lhsLength = lhs.length + 1
val rhsLength = rhs.length + 1
var cost = Array(lhsLength) { it }
var newCost = Array(lhsLength) { 0 }
for (i in 1..rhsLength-1) {
newCost[0] = i
for (j in 1..lhsLength-1) {
val match = if(lhs[j - 1] == rhs[i - 1]) 0 else 1
val costReplace = cost[j - 1] + match
val costInsert = cost[j] + 1
val costDelete = newCost[j - 1] + 1
newCost[j] = Math.min(Math.min(costInsert, costDelete), costReplace)
}
val swap = cost
cost = newCost
newCost = swap
}
return cost[lhsLength - 1]
}
I'm updating the gist, feel free to point more improvements or fixes.
Please use newCost[j] = min(min(costInsert, costDelete), costReplace)
using import kotlin.math.min
instead, this will work on all targets, not JVM only.
I would also add
if(lhs == rhs) { return 0 }
if(lhs.isEmpty()) { return rhs.length }
if(rhs.isEmpty()) { return lhs.length }
Nice improvements @hfhbd I'm adding it now, thanks!
Nice code, been trying to use it in a homebrew quiz app. Noticed some oddities though, e.g. levenshtein("canada","canad") returns 6 instead of 1. Conversely, levenshtein("canad","canada") returns 0. Haven't wrapped my brain around it enough yet to figure out why...
@dankovision maybe some env issue? I have run here https://pl.kotl.in/b6_FH8CaJ and I got 1 for both cases
@dankovision maybe some env issue? I have run here https://pl.kotl.in/b6_FH8CaJ and I got 1 for both cases
Cheers for replying, finally got around to having a look. After much poking around, it was due to me accepting the Android Studio IDE suggestion of replacing the .. operator in the for loops with 'until', e.g.
Original code: for (i in 1..rhsLength - 1)
Suggested replacement: for (i in 1 until rhsLength)
Except I thought I knew better and added the '- 1' back on to the end of rhsLength, meaning the iterations were 1 short (until stops at and doesn't execute the last in the range, '..' stops after the end of the range). Ah well, learnt something today, don't think you know better than the compiler/IDE!
And I should add thanks for the code, works a charm now!
@dankovision hahaha nice, good to know :)
If anyone is interested in some quick and dirty tests so you don't lower your test coverage when you copy paste this sum-bich in:
class LevenshteinDistanceTest {
@Test
fun `Distance Test`() {
testDistance("", "", 0)
testDistance("1", "1", 0)
testDistance("1", "2", 1)
testDistance("12", "12", 0)
testDistance("123", "12", 1)
testDistance("1234", "1", 3)
testDistance("1234", "1233", 1)
testDistance("", "12345", 5)
testDistance("kitten", "mittens", 2)
testDistance("canada", "canad", 1)
testDistance("canad", "canada", 1)
}
private fun testDistance(a: String, b: String, expectedDistance: Int) {
val d = levenshteinDistance(a, b)
assertEquals(expectedDistance, d, "Distance did not match for `$a` and `$b`")
}
}
I used the code for Levenshtein distance method from above but also asked ChatGPT to write me one. Now I'm thinking which one should I pick 🤔 😅
fun levenshteinDistanceByGpt(s1: String, s2: String): Int {
val m = s1.length
val n = s2.length
// Create a 2D array to store the distances
val dp = Array(m + 1) { IntArray(n + 1) }
// Initialize the first row and column
for (i in 0..m) {
dp[i][0] = i
}
for (j in 0..n) {
dp[0][j] = j
}
// Fill in the rest of the array
for (i in 1..m) {
for (j in 1..n) {
val cost = if (s1[i - 1] == s2[j - 1]) 0 else 1
dp[i][j] = minOf(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost)
}
}
// Return the Levenshtein distance
return dp[m][n]
}
The human-written one is (more) readable.
And both pass the test from @Wavesonics (thanks for that one! 👍 ).
Use the GPT one so then you can include "AI" in your app name and win $$$
Seriously you can benchmark them if you want 😊 I presume the human one is better for RAM as it's only 2 arrays instead of a 2d arrays, but yours should be better for cpu I guess?
You can use
rhs[...]
instead ofrhs.charAt(...)