-
-
Save Hotshot824/888d00c7066b0016c95fc3a1bd441ea9 to your computer and use it in GitHub Desktop.
Needleman-Wunsch and Hischberg algorithm implementation.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
A simple implementation of the Hirschberg algorithm to calculate the similarity between two strings, | |
Time complexity O(nm), Space complexity O(min(n, m)). | |
Output the matching strin like this: | |
Aligned X: ABCDEF--GH | |
Aligned Y: --CDEFABGH | |
Edit distance: 4 | |
*/ | |
import ( | |
"fmt" | |
) | |
var ( | |
gap = 1 | |
mismatch = 1 | |
match = 0 | |
) | |
func NeedlemanWunsch(A, B string) (string, string, int) { | |
n, m := len(A), len(B) | |
dp := make([][]int, n+1) | |
for i := range dp { | |
dp[i] = make([]int, m+1) | |
} | |
for i := 1; i <= n; i++ { | |
dp[i][0] = i | |
} | |
for j := 1; j <= m; j++ { | |
dp[0][j] = j | |
} | |
for i := 1; i <= n; i++ { | |
for j := 1; j <= m; j++ { | |
if A[i-1] == B[j-1] { | |
dp[i][j] = dp[i-1][j-1] + match | |
} else { | |
dp[i][j] = min(dp[i-1][j]+mismatch, min(dp[i][j-1]+gap, dp[i-1][j-1]+gap)) | |
} | |
} | |
} | |
return traceback(A, B, dp) | |
} | |
func traceback(A, B string, dp [][]int) (string, string, int) { | |
i, j := len(A), len(B) | |
Z, W := "", "" | |
editDistance := 0 | |
for i > 0 || j > 0 { | |
if i > 0 && j > 0 && A[i-1] == B[j-1] { | |
Z = string(A[i-1]) + Z | |
W = string(B[j-1]) + W | |
i-- | |
j-- | |
} else if j > 0 && (i == 0 || dp[i][j-1] < dp[i-1][j]) { | |
Z = "-" + Z | |
W = string(B[j-1]) + W | |
j-- | |
editDistance++ | |
} else { | |
Z = string(A[i-1]) + Z | |
W = "-" + W | |
i-- | |
editDistance++ | |
} | |
} | |
return Z, W, editDistance | |
} | |
func lcsLength(A, B string) []int { | |
n, m := len(A), len(B) | |
prev := make([]int, m+1) | |
for i := 1; i <= n; i++ { | |
current := make([]int, m+1) | |
for j := 1; j <= m; j++ { | |
if A[i-1] == B[j-1] { | |
current[j] = prev[j-1] + 1 | |
} else { | |
current[j] = max(prev[j], current[j-1]) | |
} | |
} | |
prev = current | |
} | |
return prev | |
} | |
func hirschbergRecursive(A, B string) (string, string, int) { | |
if len(A) == 0 { | |
return repeat("-", len(B)), B, len(B) | |
} else if len(B) == 0 { | |
return A, repeat("-", len(A)), len(A) | |
} else if len(A) == 1 || len(B) == 1 { | |
return NeedlemanWunsch(A, B) | |
} else { | |
mid := len(A) / 2 | |
L1 := lcsLength(A[:mid], B) | |
L2 := lcsLength(reverse(A[mid:]), reverse(B)) | |
reverseSlice(L2) | |
partition := maxIndex(L1, L2) | |
Z1, W1, dist1 := hirschbergRecursive(A[:mid], B[:partition]) | |
Z2, W2, dist2 := hirschbergRecursive(A[mid:], B[partition:]) | |
return Z1 + Z2, W1 + W2, dist1 + dist2 | |
} | |
} | |
func hirschberg(X, Y string) (string, string, int) { | |
Z, W, dist := hirschbergRecursive(X, Y) | |
return Z, W, dist | |
} | |
func min(a, b int) int { | |
if a < b { | |
return a | |
} | |
return b | |
} | |
func max(a, b int) int { | |
if a > b { | |
return a | |
} | |
return b | |
} | |
func repeat(s string, count int) string { | |
result := "" | |
for i := 0; i < count; i++ { | |
result += s | |
} | |
return result | |
} | |
func reverse(s string) string { | |
r := []rune(s) | |
for i, j := 0, len(r)-1; i < j; i, j = i+1, j-1 { | |
r[i], r[j] = r[j], r[i] | |
} | |
return string(r) | |
} | |
func reverseSlice(slice []int) { | |
for i, j := 0, len(slice)-1; i < j; i, j = i+1, j-1 { | |
slice[i], slice[j] = slice[j], slice[i] | |
} | |
} | |
func maxIndex(L1, L2 []int) int { | |
maxIndex := 0 | |
maxValue := L1[0] + L2[0] | |
for i := 1; i < len(L1); i++ { | |
if L1[i]+L2[i] > maxValue { | |
maxValue = L1[i] + L2[i] | |
maxIndex = i | |
} | |
} | |
return maxIndex | |
} | |
func main() { | |
X := "ABCDEFGH" | |
Y := "CDEFABGH" | |
Z, W, dist := hirschberg(X, Y) | |
fmt.Println("Aligned X:", Z) | |
fmt.Println("Aligned Y:", W) | |
fmt.Println("Edit distance:", dist) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Using Needleman-Wunsch algorithm to calculate the similarity between two strings, | |
Time complexity O(nm), Space complexity O(nm). | |
Output the matching strin like this: | |
Aligned X: ABCDEF--GH | |
Aligned Y: --CDEFABGH | |
Edit distance: 4 | |
*/ | |
import ( | |
"fmt" | |
) | |
var ( | |
gap = 1 | |
mismatch = 1 | |
match = 0 | |
) | |
func NeedlemanWunsch(A, B string) (string, string, int) { | |
n, m := len(A), len(B) | |
dp := make([][]int, n+1) | |
for i := range dp { | |
dp[i] = make([]int, m+1) | |
} | |
for i := 1; i <= n; i++ { | |
dp[i][0] = i | |
} | |
for j := 1; j <= m; j++ { | |
dp[0][j] = j | |
} | |
for i := 1; i <= n; i++ { | |
for j := 1; j <= m; j++ { | |
if A[i-1] == B[j-1] { | |
dp[i][j] = dp[i-1][j-1] + match | |
} else { | |
dp[i][j] = min(dp[i-1][j]+mismatch, min(dp[i][j-1]+gap, dp[i-1][j-1]+gap)) | |
} | |
} | |
} | |
return traceback(A, B, dp) | |
} | |
func traceback(A, B string, dp [][]int) (string, string, int) { | |
i, j := len(A), len(B) | |
Z, W := "", "" | |
editDistance := 0 | |
for i > 0 || j > 0 { | |
if i > 0 && j > 0 && A[i-1] == B[j-1] { | |
Z = string(A[i-1]) + Z | |
W = string(B[j-1]) + W | |
i-- | |
j-- | |
} else if j > 0 && (i == 0 || dp[i][j-1] < dp[i-1][j]) { | |
Z = "-" + Z | |
W = string(B[j-1]) + W | |
j-- | |
editDistance++ | |
} else { | |
Z = string(A[i-1]) + Z | |
W = "-" + W | |
i-- | |
editDistance++ | |
} | |
} | |
return Z, W, editDistance | |
} | |
func min(a, b int) int { | |
if a < b { | |
return a | |
} | |
return b | |
} | |
func max(a, b int) int { | |
if a > b { | |
return a | |
} | |
return b | |
} | |
func main() { | |
X := "ABCDEFGH" | |
Y := "CDEFABGH" | |
Z, W, dist := NeedlemanWunsch(X, Y) | |
fmt.Println("Aligned X:", Z) | |
fmt.Println("Aligned Y:", W) | |
fmt.Println("Edit distance:", dist) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment