Skip to content

Instantly share code, notes, and snippets.

@Hotshot824
Last active June 3, 2024 17:55
Show Gist options
  • Save Hotshot824/888d00c7066b0016c95fc3a1bd441ea9 to your computer and use it in GitHub Desktop.
Save Hotshot824/888d00c7066b0016c95fc3a1bd441ea9 to your computer and use it in GitHub Desktop.
Needleman-Wunsch and Hischberg algorithm implementation.
/*
A simple implementation of the Hirschberg algorithm to calculate the similarity between two strings,
Time complexity O(nm), Space complexity O(min(n, m)).
Output the matching strin like this:
Aligned X: ABCDEF--GH
Aligned Y: --CDEFABGH
Edit distance: 4
*/
import (
"fmt"
)
var (
gap = 1
mismatch = 1
match = 0
)
func NeedlemanWunsch(A, B string) (string, string, int) {
n, m := len(A), len(B)
dp := make([][]int, n+1)
for i := range dp {
dp[i] = make([]int, m+1)
}
for i := 1; i <= n; i++ {
dp[i][0] = i
}
for j := 1; j <= m; j++ {
dp[0][j] = j
}
for i := 1; i <= n; i++ {
for j := 1; j <= m; j++ {
if A[i-1] == B[j-1] {
dp[i][j] = dp[i-1][j-1] + match
} else {
dp[i][j] = min(dp[i-1][j]+mismatch, min(dp[i][j-1]+gap, dp[i-1][j-1]+gap))
}
}
}
return traceback(A, B, dp)
}
func traceback(A, B string, dp [][]int) (string, string, int) {
i, j := len(A), len(B)
Z, W := "", ""
editDistance := 0
for i > 0 || j > 0 {
if i > 0 && j > 0 && A[i-1] == B[j-1] {
Z = string(A[i-1]) + Z
W = string(B[j-1]) + W
i--
j--
} else if j > 0 && (i == 0 || dp[i][j-1] < dp[i-1][j]) {
Z = "-" + Z
W = string(B[j-1]) + W
j--
editDistance++
} else {
Z = string(A[i-1]) + Z
W = "-" + W
i--
editDistance++
}
}
return Z, W, editDistance
}
func lcsLength(A, B string) []int {
n, m := len(A), len(B)
prev := make([]int, m+1)
for i := 1; i <= n; i++ {
current := make([]int, m+1)
for j := 1; j <= m; j++ {
if A[i-1] == B[j-1] {
current[j] = prev[j-1] + 1
} else {
current[j] = max(prev[j], current[j-1])
}
}
prev = current
}
return prev
}
func hirschbergRecursive(A, B string) (string, string, int) {
if len(A) == 0 {
return repeat("-", len(B)), B, len(B)
} else if len(B) == 0 {
return A, repeat("-", len(A)), len(A)
} else if len(A) == 1 || len(B) == 1 {
return NeedlemanWunsch(A, B)
} else {
mid := len(A) / 2
L1 := lcsLength(A[:mid], B)
L2 := lcsLength(reverse(A[mid:]), reverse(B))
reverseSlice(L2)
partition := maxIndex(L1, L2)
Z1, W1, dist1 := hirschbergRecursive(A[:mid], B[:partition])
Z2, W2, dist2 := hirschbergRecursive(A[mid:], B[partition:])
return Z1 + Z2, W1 + W2, dist1 + dist2
}
}
func hirschberg(X, Y string) (string, string, int) {
Z, W, dist := hirschbergRecursive(X, Y)
return Z, W, dist
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func repeat(s string, count int) string {
result := ""
for i := 0; i < count; i++ {
result += s
}
return result
}
func reverse(s string) string {
r := []rune(s)
for i, j := 0, len(r)-1; i < j; i, j = i+1, j-1 {
r[i], r[j] = r[j], r[i]
}
return string(r)
}
func reverseSlice(slice []int) {
for i, j := 0, len(slice)-1; i < j; i, j = i+1, j-1 {
slice[i], slice[j] = slice[j], slice[i]
}
}
func maxIndex(L1, L2 []int) int {
maxIndex := 0
maxValue := L1[0] + L2[0]
for i := 1; i < len(L1); i++ {
if L1[i]+L2[i] > maxValue {
maxValue = L1[i] + L2[i]
maxIndex = i
}
}
return maxIndex
}
func main() {
X := "ABCDEFGH"
Y := "CDEFABGH"
Z, W, dist := hirschberg(X, Y)
fmt.Println("Aligned X:", Z)
fmt.Println("Aligned Y:", W)
fmt.Println("Edit distance:", dist)
}
/*
Using Needleman-Wunsch algorithm to calculate the similarity between two strings,
Time complexity O(nm), Space complexity O(nm).
Output the matching strin like this:
Aligned X: ABCDEF--GH
Aligned Y: --CDEFABGH
Edit distance: 4
*/
import (
"fmt"
)
var (
gap = 1
mismatch = 1
match = 0
)
func NeedlemanWunsch(A, B string) (string, string, int) {
n, m := len(A), len(B)
dp := make([][]int, n+1)
for i := range dp {
dp[i] = make([]int, m+1)
}
for i := 1; i <= n; i++ {
dp[i][0] = i
}
for j := 1; j <= m; j++ {
dp[0][j] = j
}
for i := 1; i <= n; i++ {
for j := 1; j <= m; j++ {
if A[i-1] == B[j-1] {
dp[i][j] = dp[i-1][j-1] + match
} else {
dp[i][j] = min(dp[i-1][j]+mismatch, min(dp[i][j-1]+gap, dp[i-1][j-1]+gap))
}
}
}
return traceback(A, B, dp)
}
func traceback(A, B string, dp [][]int) (string, string, int) {
i, j := len(A), len(B)
Z, W := "", ""
editDistance := 0
for i > 0 || j > 0 {
if i > 0 && j > 0 && A[i-1] == B[j-1] {
Z = string(A[i-1]) + Z
W = string(B[j-1]) + W
i--
j--
} else if j > 0 && (i == 0 || dp[i][j-1] < dp[i-1][j]) {
Z = "-" + Z
W = string(B[j-1]) + W
j--
editDistance++
} else {
Z = string(A[i-1]) + Z
W = "-" + W
i--
editDistance++
}
}
return Z, W, editDistance
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func main() {
X := "ABCDEFGH"
Y := "CDEFABGH"
Z, W, dist := NeedlemanWunsch(X, Y)
fmt.Println("Aligned X:", Z)
fmt.Println("Aligned Y:", W)
fmt.Println("Edit distance:", dist)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment