Created
April 8, 2016 21:26
-
-
Save igorbonadio/bbd26f426365f3cec5e6bf590014f63f to your computer and use it in GitHub Desktop.
Needleman-Wunsch algorithm implementation in Haskell (tail recursive version)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Array | |
import Data.List | |
import Data.Ord | |
fromBlosum :: Char -> Int | |
fromBlosum a = case elemIndex a "ARNDCQEGHILKMFPSTWYVBJZX" of | |
Just i -> i | |
Nothing -> error "Invalid amino acid" | |
toBlosum :: Int -> Char | |
toBlosum a = "ARNDCQEGHILKMFPSTWYVBJZX" !! a | |
convert :: [Char] -> [Int] | |
convert [] = [] | |
convert (x:xs) = fromBlosum x : convert xs | |
blosum80 = listArray (0, 575) [ | |
5, -2, -2, -2, -1, -1, -1, 0, -2, -2, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -2, -1, -1, | |
-2, 6, -1, -2, -4, 1, -1, -3, 0, -3, -3, 2, -2, -4, -2, -1, -1, -4, -3, -3, -1, -3, 0, -1, | |
-2, -1, 6, 1, -3, 0, -1, -1, 0, -4, -4, 0, -3, -4, -3, 0, 0, -4, -3, -4, 5, -4, 0, -1, | |
-2, -2, 1, 6, -4, -1, 1, -2, -2, -4, -5, -1, -4, -4, -2, -1, -1, -6, -4, -4, 5, -5, 1, -1, | |
-1, -4, -3, -4, 9, -4, -5, -4, -4, -2, -2, -4, -2, -3, -4, -2, -1, -3, -3, -1, -4, -2, -4, -1, | |
-1, 1, 0, -1, -4, 6, 2, -2, 1, -3, -3, 1, 0, -4, -2, 0, -1, -3, -2, -3, 0, -3, 4, -1, | |
-1, -1, -1, 1, -5, 2, 6, -3, 0, -4, -4, 1, -2, -4, -2, 0, -1, -4, -3, -3, 1, -4, 5, -1, | |
0, -3, -1, -2, -4, -2, -3, 6, -3, -5, -4, -2, -4, -4, -3, -1, -2, -4, -4, -4, -1, -5, -3, -1, | |
-2, 0, 0, -2, -4, 1, 0, -3, 8, -4, -3, -1, -2, -2, -3, -1, -2, -3, 2, -4, -1, -4, 0, -1, | |
-2, -3, -4, -4, -2, -3, -4, -5, -4, 5, 1, -3, 1, -1, -4, -3, -1, -3, -2, 3, -4, 3, -4, -1, | |
-2, -3, -4, -5, -2, -3, -4, -4, -3, 1, 4, -3, 2, 0, -3, -3, -2, -2, -2, 1, -4, 3, -3, -1, | |
-1, 2, 0, -1, -4, 1, 1, -2, -1, -3, -3, 5, -2, -4, -1, -1, -1, -4, -3, -3, -1, -3, 1, -1, | |
-1, -2, -3, -4, -2, 0, -2, -4, -2, 1, 2, -2, 6, 0, -3, -2, -1, -2, -2, 1, -3, 2, -1, -1, | |
-3, -4, -4, -4, -3, -4, -4, -4, -2, -1, 0, -4, 0, 6, -4, -3, -2, 0, 3, -1, -4, 0, -4, -1, | |
-1, -2, -3, -2, -4, -2, -2, -3, -3, -4, -3, -1, -3, -4, 8, -1, -2, -5, -4, -3, -2, -4, -2, -1, | |
1, -1, 0, -1, -2, 0, 0, -1, -1, -3, -3, -1, -2, -3, -1, 5, 1, -4, -2, -2, 0, -3, 0, -1, | |
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -2, -1, -1, -2, -2, 1, 5, -4, -2, 0, -1, -1, -1, -1, | |
-3, -4, -4, -6, -3, -3, -4, -4, -3, -3, -2, -4, -2, 0, -5, -4, -4, 11, 2, -3, -5, -3, -3, -1, | |
-2, -3, -3, -4, -3, -2, -3, -4, 2, -2, -2, -3, -2, 3, -4, -2, -2, 2, 7, -2, -3, -2, -3, -1, | |
0, -3, -4, -4, -1, -3, -3, -4, -4, 3, 1, -3, 1, -1, -3, -2, 0, -3, -2, 4, -4, 2, -3, -1, | |
-2, -1, 5, 5, -4, 0, 1, -1, -1, -4, -4, -1, -3, -4, -2, 0, -1, -5, -3, -4, 5, -4, 0, -1, | |
-2, -3, -4, -5, -2, -3, -4, -5, -4, 3, 3, -3, 2, 0, -4, -3, -1, -3, -2, 2, -4, 3, -3, -1, | |
-1, 0, 0, 1, -4, 4, 5, -3, 0, -4, -3, 1, -1, -4, -2, 0, -1, -3, -3, -3, 0, -3, 5, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1] | |
score :: Int -> Int -> Int | |
score a b = blosum80 ! (a * 24 + b) | |
gap :: Int | |
gap = -6 | |
-- Inefficient implementation | |
max3 a b c = max a (max b c) | |
align' :: [Int] -> [Int] -> Int | |
align' [] ys = gap * length ys | |
align' xs [] = gap * length xs | |
align' xss@(x:xs) yss@(y:ys) = | |
max3 (score x y + align' xs ys) (gap + align' xss ys) (gap + align' xs yss) | |
-- Efficient implementation | |
data Alignment = Match | GapX | GapY | |
deriving (Show) | |
align :: [Int] -> [Int] -> (Int, [Alignment]) | |
align xs ys = let (value, alignment) = head (head matrix) in (value, reverse alignment) | |
where matrix :: [[(Int, [Alignment])]] | |
matrix = eachRow ys firstRow [] | |
eachRow :: [Int] -> [(Int, [Alignment])] -> [[(Int, [Alignment])]] -> [[(Int, [Alignment])]] | |
eachRow [] _ row = row | |
eachRow (y:ys) lastRow@((s, a):rs) row = eachRow ys (reverse currentRow) (currentRow : row) | |
where currentRow = eachColunm y xs lastRow (s + gap, GapX : a) [] | |
eachColunm :: Int -> [Int] -> [(Int, [Alignment])] -> (Int, [Alignment]) -> [(Int, [Alignment])] -> [(Int, [Alignment])] | |
eachColunm _ [] _ left colunm = left : colunm | |
eachColunm y (x:xs) (upperLeft:upper:lastRow) left colunm = | |
eachColunm y xs (upper:lastRow) bestAlignment (left : colunm) | |
where (ulScore, ulAlignment) = upperLeft | |
(lfScore, lfAlignment) = left | |
(upScore, upAlignment) = upper | |
possibleAlignments = [ | |
(score x y + ulScore, Match : ulAlignment), | |
(upScore + gap, GapX : upAlignment), | |
(lfScore + gap, GapY : lfAlignment)] | |
bestAlignment = maximumBy (comparing fst) possibleAlignments | |
firstRow :: [(Int, [Alignment])] | |
firstRow = map (\x -> (x * gap, take x (repeat GapY))) [0..length xs] | |
-- Support Functions | |
alignmentToString :: (Int, [Alignment]) -> [Int] -> [Int] -> String | |
alignmentToString (value, as) xs ys = "x: " ++ (printX as xs) ++ | |
"\ny: " ++ (printY as ys) ++ | |
"\nscore: " ++ (show value) | |
where printX [] [] = "" | |
printX [] (x:xs) = (toBlosum x) : printX [] xs | |
printX (Match:as) (x:xs) = (toBlosum x) : printX as xs | |
printX (GapX:as) xs = "-" ++ printX as xs | |
printX (GapY:as) (x:xs) = (toBlosum x) : printX as xs | |
printY [] [] = "" | |
printY [] (y:ys) = (toBlosum y) : printY [] ys | |
printY (a:as) [] = "-" ++ printY as [] | |
printY (Match:as) (y:ys) = (toBlosum y) : printY as ys | |
printY (GapX:as) (y:ys) = (toBlosum y) : printY as ys | |
printY (GapY:as) ys = "-" ++ printY as ys | |
seq1 = convert "HEAGAWGHEE" | |
seq2 = convert "PAWHEAE" | |
main = putStrLn $ alignmentToString (align seq1 seq2) seq1 seq2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment