Created
April 8, 2016 14:06
-
-
Save igorbonadio/def8416920c83b5bd7dfb2ea88f67013 to your computer and use it in GitHub Desktop.
Needleman-Wunsch algorithm implementation in Haskell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Array | |
import Data.List | |
import Data.Ord | |
fromBlosum :: Char -> Int | |
fromBlosum a = case elemIndex a "ARNDCQEGHILKMFPSTWYVBJZX" of | |
Just i -> i | |
Nothing -> error "Invalid amino acid" | |
toBlosum :: Int -> Char | |
toBlosum a = "ARNDCQEGHILKMFPSTWYVBJZX" !! a | |
convert :: [Char] -> [Int] | |
convert [] = [] | |
convert (x:xs) = fromBlosum x : convert xs | |
blosum80 = listArray (0, 575) [ | |
5, -2, -2, -2, -1, -1, -1, 0, -2, -2, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -2, -1, -1, | |
-2, 6, -1, -2, -4, 1, -1, -3, 0, -3, -3, 2, -2, -4, -2, -1, -1, -4, -3, -3, -1, -3, 0, -1, | |
-2, -1, 6, 1, -3, 0, -1, -1, 0, -4, -4, 0, -3, -4, -3, 0, 0, -4, -3, -4, 5, -4, 0, -1, | |
-2, -2, 1, 6, -4, -1, 1, -2, -2, -4, -5, -1, -4, -4, -2, -1, -1, -6, -4, -4, 5, -5, 1, -1, | |
-1, -4, -3, -4, 9, -4, -5, -4, -4, -2, -2, -4, -2, -3, -4, -2, -1, -3, -3, -1, -4, -2, -4, -1, | |
-1, 1, 0, -1, -4, 6, 2, -2, 1, -3, -3, 1, 0, -4, -2, 0, -1, -3, -2, -3, 0, -3, 4, -1, | |
-1, -1, -1, 1, -5, 2, 6, -3, 0, -4, -4, 1, -2, -4, -2, 0, -1, -4, -3, -3, 1, -4, 5, -1, | |
0, -3, -1, -2, -4, -2, -3, 6, -3, -5, -4, -2, -4, -4, -3, -1, -2, -4, -4, -4, -1, -5, -3, -1, | |
-2, 0, 0, -2, -4, 1, 0, -3, 8, -4, -3, -1, -2, -2, -3, -1, -2, -3, 2, -4, -1, -4, 0, -1, | |
-2, -3, -4, -4, -2, -3, -4, -5, -4, 5, 1, -3, 1, -1, -4, -3, -1, -3, -2, 3, -4, 3, -4, -1, | |
-2, -3, -4, -5, -2, -3, -4, -4, -3, 1, 4, -3, 2, 0, -3, -3, -2, -2, -2, 1, -4, 3, -3, -1, | |
-1, 2, 0, -1, -4, 1, 1, -2, -1, -3, -3, 5, -2, -4, -1, -1, -1, -4, -3, -3, -1, -3, 1, -1, | |
-1, -2, -3, -4, -2, 0, -2, -4, -2, 1, 2, -2, 6, 0, -3, -2, -1, -2, -2, 1, -3, 2, -1, -1, | |
-3, -4, -4, -4, -3, -4, -4, -4, -2, -1, 0, -4, 0, 6, -4, -3, -2, 0, 3, -1, -4, 0, -4, -1, | |
-1, -2, -3, -2, -4, -2, -2, -3, -3, -4, -3, -1, -3, -4, 8, -1, -2, -5, -4, -3, -2, -4, -2, -1, | |
1, -1, 0, -1, -2, 0, 0, -1, -1, -3, -3, -1, -2, -3, -1, 5, 1, -4, -2, -2, 0, -3, 0, -1, | |
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -2, -1, -1, -2, -2, 1, 5, -4, -2, 0, -1, -1, -1, -1, | |
-3, -4, -4, -6, -3, -3, -4, -4, -3, -3, -2, -4, -2, 0, -5, -4, -4, 11, 2, -3, -5, -3, -3, -1, | |
-2, -3, -3, -4, -3, -2, -3, -4, 2, -2, -2, -3, -2, 3, -4, -2, -2, 2, 7, -2, -3, -2, -3, -1, | |
0, -3, -4, -4, -1, -3, -3, -4, -4, 3, 1, -3, 1, -1, -3, -2, 0, -3, -2, 4, -4, 2, -3, -1, | |
-2, -1, 5, 5, -4, 0, 1, -1, -1, -4, -4, -1, -3, -4, -2, 0, -1, -5, -3, -4, 5, -4, 0, -1, | |
-2, -3, -4, -5, -2, -3, -4, -5, -4, 3, 3, -3, 2, 0, -4, -3, -1, -3, -2, 2, -4, 3, -3, -1, | |
-1, 0, 0, 1, -4, 4, 5, -3, 0, -4, -3, 1, -1, -4, -2, 0, -1, -3, -3, -3, 0, -3, 5, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1] | |
score :: Int -> Int -> Int | |
score a b = blosum80 ! (a * 24 + b) | |
gap :: Int | |
gap = -6 | |
-- Inefficient implementation | |
max3 a b c = max a (max b c) | |
align' :: [Int] -> [Int] -> Int | |
align' [] ys = gap * length ys | |
align' xs [] = gap * length xs | |
align' xss@(x:xs) yss@(y:ys) = | |
max3 (score x y + align' xs ys) (gap + align' xss ys) (gap + align' xs yss) | |
-- Efficient implementation | |
data Alignment = Match | GapX | GapY | |
deriving (Show) | |
align :: [Int] -> [Int] -> (Int, [Alignment]) | |
align xs ys = let (value, alignment) = last (last matrix) in (value, reverse alignment) | |
where matrix :: [[(Int, [Alignment])]] | |
matrix = firstRow : (eachRow ys firstRow) | |
eachRow :: [Int] -> [(Int, [Alignment])] -> [[(Int, [Alignment])]] | |
eachRow [] _ = [] | |
eachRow (y:ys) lastRow@((s, a):rs) = currentRow : (eachRow ys currentRow) | |
where currentRow = (eachColunm y xs lastRow (s + gap, GapX : a)) | |
eachColunm :: Int -> [Int] -> [(Int, [Alignment])] -> (Int, [Alignment]) -> [(Int, [Alignment])] | |
eachColunm _ [] _ left = [left] | |
eachColunm y (x:xs) (upperLeft:upper:lastRow) left = | |
left : (eachColunm y xs (upper:lastRow) bestAlignment) | |
where (ulScore, ulAlignment) = upperLeft | |
(lfScore, lfAlignment) = left | |
(upScore, upAlignment) = upper | |
possibleAlignments = [ | |
(score x y + ulScore, Match : ulAlignment), | |
(upScore + gap, GapX : upAlignment), | |
(lfScore + gap, GapY : lfAlignment)] | |
bestAlignment = maximumBy (comparing fst) possibleAlignments | |
firstRow :: [(Int, [Alignment])] | |
firstRow = map (\x -> (x * gap, take x (repeat GapY))) [0..length xs] | |
-- Support Functions | |
alignmentToString :: (Int, [Alignment]) -> [Int] -> [Int] -> String | |
alignmentToString (value, as) xs ys = "x: " ++ (printX as xs) ++ | |
"\ny: " ++ (printY as ys) ++ | |
"\nscore: " ++ (show value) | |
where printX [] [] = "" | |
printX [] (x:xs) = (toBlosum x) : printX [] xs | |
printX (Match:as) (x:xs) = (toBlosum x) : printX as xs | |
printX (GapX:as) xs = "-" ++ printX as xs | |
printX (GapY:as) (x:xs) = (toBlosum x) : printX as xs | |
printY [] [] = "" | |
printY [] (y:ys) = (toBlosum y) : printY [] ys | |
printY (a:as) [] = "-" ++ printY as [] | |
printY (Match:as) (y:ys) = (toBlosum y) : printY as ys | |
printY (GapX:as) (y:ys) = (toBlosum y) : printY as ys | |
printY (GapY:as) ys = "-" ++ printY as ys | |
seq1 = convert "HEAGAWGHEE" | |
seq2 = convert "PAWHEAE" | |
main = putStrLn $ alignmentToString (align seq1 seq2) seq1 seq2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment