Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Needleman-Wunsch algorithm implementation in Haskell (tail recursive version)
import Data.Array
import Data.List
import Data.Ord
fromBlosum :: Char -> Int
fromBlosum a = case elemIndex a "ARNDCQEGHILKMFPSTWYVBJZX" of
Just i -> i
Nothing -> error "Invalid amino acid"
toBlosum :: Int -> Char
toBlosum a = "ARNDCQEGHILKMFPSTWYVBJZX" !! a
convert :: [Char] -> [Int]
convert [] = []
convert (x:xs) = fromBlosum x : convert xs
blosum80 = listArray (0, 575) [
5, -2, -2, -2, -1, -1, -1, 0, -2, -2, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -2, -1, -1,
-2, 6, -1, -2, -4, 1, -1, -3, 0, -3, -3, 2, -2, -4, -2, -1, -1, -4, -3, -3, -1, -3, 0, -1,
-2, -1, 6, 1, -3, 0, -1, -1, 0, -4, -4, 0, -3, -4, -3, 0, 0, -4, -3, -4, 5, -4, 0, -1,
-2, -2, 1, 6, -4, -1, 1, -2, -2, -4, -5, -1, -4, -4, -2, -1, -1, -6, -4, -4, 5, -5, 1, -1,
-1, -4, -3, -4, 9, -4, -5, -4, -4, -2, -2, -4, -2, -3, -4, -2, -1, -3, -3, -1, -4, -2, -4, -1,
-1, 1, 0, -1, -4, 6, 2, -2, 1, -3, -3, 1, 0, -4, -2, 0, -1, -3, -2, -3, 0, -3, 4, -1,
-1, -1, -1, 1, -5, 2, 6, -3, 0, -4, -4, 1, -2, -4, -2, 0, -1, -4, -3, -3, 1, -4, 5, -1,
0, -3, -1, -2, -4, -2, -3, 6, -3, -5, -4, -2, -4, -4, -3, -1, -2, -4, -4, -4, -1, -5, -3, -1,
-2, 0, 0, -2, -4, 1, 0, -3, 8, -4, -3, -1, -2, -2, -3, -1, -2, -3, 2, -4, -1, -4, 0, -1,
-2, -3, -4, -4, -2, -3, -4, -5, -4, 5, 1, -3, 1, -1, -4, -3, -1, -3, -2, 3, -4, 3, -4, -1,
-2, -3, -4, -5, -2, -3, -4, -4, -3, 1, 4, -3, 2, 0, -3, -3, -2, -2, -2, 1, -4, 3, -3, -1,
-1, 2, 0, -1, -4, 1, 1, -2, -1, -3, -3, 5, -2, -4, -1, -1, -1, -4, -3, -3, -1, -3, 1, -1,
-1, -2, -3, -4, -2, 0, -2, -4, -2, 1, 2, -2, 6, 0, -3, -2, -1, -2, -2, 1, -3, 2, -1, -1,
-3, -4, -4, -4, -3, -4, -4, -4, -2, -1, 0, -4, 0, 6, -4, -3, -2, 0, 3, -1, -4, 0, -4, -1,
-1, -2, -3, -2, -4, -2, -2, -3, -3, -4, -3, -1, -3, -4, 8, -1, -2, -5, -4, -3, -2, -4, -2, -1,
1, -1, 0, -1, -2, 0, 0, -1, -1, -3, -3, -1, -2, -3, -1, 5, 1, -4, -2, -2, 0, -3, 0, -1,
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -2, -1, -1, -2, -2, 1, 5, -4, -2, 0, -1, -1, -1, -1,
-3, -4, -4, -6, -3, -3, -4, -4, -3, -3, -2, -4, -2, 0, -5, -4, -4, 11, 2, -3, -5, -3, -3, -1,
-2, -3, -3, -4, -3, -2, -3, -4, 2, -2, -2, -3, -2, 3, -4, -2, -2, 2, 7, -2, -3, -2, -3, -1,
0, -3, -4, -4, -1, -3, -3, -4, -4, 3, 1, -3, 1, -1, -3, -2, 0, -3, -2, 4, -4, 2, -3, -1,
-2, -1, 5, 5, -4, 0, 1, -1, -1, -4, -4, -1, -3, -4, -2, 0, -1, -5, -3, -4, 5, -4, 0, -1,
-2, -3, -4, -5, -2, -3, -4, -5, -4, 3, 3, -3, 2, 0, -4, -3, -1, -3, -2, 2, -4, 3, -3, -1,
-1, 0, 0, 1, -4, 4, 5, -3, 0, -4, -3, 1, -1, -4, -2, 0, -1, -3, -3, -3, 0, -3, 5, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
score :: Int -> Int -> Int
score a b = blosum80 ! (a * 24 + b)
gap :: Int
gap = -6
-- Inefficient implementation
max3 a b c = max a (max b c)
align' :: [Int] -> [Int] -> Int
align' [] ys = gap * length ys
align' xs [] = gap * length xs
align' xss@(x:xs) yss@(y:ys) =
max3 (score x y + align' xs ys) (gap + align' xss ys) (gap + align' xs yss)
-- Efficient implementation
data Alignment = Match | GapX | GapY
deriving (Show)
align :: [Int] -> [Int] -> (Int, [Alignment])
align xs ys = let (value, alignment) = head (head matrix) in (value, reverse alignment)
where matrix :: [[(Int, [Alignment])]]
matrix = eachRow ys firstRow []
eachRow :: [Int] -> [(Int, [Alignment])] -> [[(Int, [Alignment])]] -> [[(Int, [Alignment])]]
eachRow [] _ row = row
eachRow (y:ys) lastRow@((s, a):rs) row = eachRow ys (reverse currentRow) (currentRow : row)
where currentRow = eachColunm y xs lastRow (s + gap, GapX : a) []
eachColunm :: Int -> [Int] -> [(Int, [Alignment])] -> (Int, [Alignment]) -> [(Int, [Alignment])] -> [(Int, [Alignment])]
eachColunm _ [] _ left colunm = left : colunm
eachColunm y (x:xs) (upperLeft:upper:lastRow) left colunm =
eachColunm y xs (upper:lastRow) bestAlignment (left : colunm)
where (ulScore, ulAlignment) = upperLeft
(lfScore, lfAlignment) = left
(upScore, upAlignment) = upper
possibleAlignments = [
(score x y + ulScore, Match : ulAlignment),
(upScore + gap, GapX : upAlignment),
(lfScore + gap, GapY : lfAlignment)]
bestAlignment = maximumBy (comparing fst) possibleAlignments
firstRow :: [(Int, [Alignment])]
firstRow = map (\x -> (x * gap, take x (repeat GapY))) [0..length xs]
-- Support Functions
alignmentToString :: (Int, [Alignment]) -> [Int] -> [Int] -> String
alignmentToString (value, as) xs ys = "x: " ++ (printX as xs) ++
"\ny: " ++ (printY as ys) ++
"\nscore: " ++ (show value)
where printX [] [] = ""
printX [] (x:xs) = (toBlosum x) : printX [] xs
printX (Match:as) (x:xs) = (toBlosum x) : printX as xs
printX (GapX:as) xs = "-" ++ printX as xs
printX (GapY:as) (x:xs) = (toBlosum x) : printX as xs
printY [] [] = ""
printY [] (y:ys) = (toBlosum y) : printY [] ys
printY (a:as) [] = "-" ++ printY as []
printY (Match:as) (y:ys) = (toBlosum y) : printY as ys
printY (GapX:as) (y:ys) = (toBlosum y) : printY as ys
printY (GapY:as) ys = "-" ++ printY as ys
seq1 = convert "HEAGAWGHEE"
seq2 = convert "PAWHEAE"
main = putStrLn $ alignmentToString (align seq1 seq2) seq1 seq2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.