Skip to content

Instantly share code, notes, and snippets.

@igorbonadio
Created April 8, 2016 21:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save igorbonadio/bbd26f426365f3cec5e6bf590014f63f to your computer and use it in GitHub Desktop.
Save igorbonadio/bbd26f426365f3cec5e6bf590014f63f to your computer and use it in GitHub Desktop.
Needleman-Wunsch algorithm implementation in Haskell (tail recursive version)
import Data.Array
import Data.List
import Data.Ord
fromBlosum :: Char -> Int
fromBlosum a = case elemIndex a "ARNDCQEGHILKMFPSTWYVBJZX" of
Just i -> i
Nothing -> error "Invalid amino acid"
toBlosum :: Int -> Char
toBlosum a = "ARNDCQEGHILKMFPSTWYVBJZX" !! a
convert :: [Char] -> [Int]
convert [] = []
convert (x:xs) = fromBlosum x : convert xs
blosum80 = listArray (0, 575) [
5, -2, -2, -2, -1, -1, -1, 0, -2, -2, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -2, -1, -1,
-2, 6, -1, -2, -4, 1, -1, -3, 0, -3, -3, 2, -2, -4, -2, -1, -1, -4, -3, -3, -1, -3, 0, -1,
-2, -1, 6, 1, -3, 0, -1, -1, 0, -4, -4, 0, -3, -4, -3, 0, 0, -4, -3, -4, 5, -4, 0, -1,
-2, -2, 1, 6, -4, -1, 1, -2, -2, -4, -5, -1, -4, -4, -2, -1, -1, -6, -4, -4, 5, -5, 1, -1,
-1, -4, -3, -4, 9, -4, -5, -4, -4, -2, -2, -4, -2, -3, -4, -2, -1, -3, -3, -1, -4, -2, -4, -1,
-1, 1, 0, -1, -4, 6, 2, -2, 1, -3, -3, 1, 0, -4, -2, 0, -1, -3, -2, -3, 0, -3, 4, -1,
-1, -1, -1, 1, -5, 2, 6, -3, 0, -4, -4, 1, -2, -4, -2, 0, -1, -4, -3, -3, 1, -4, 5, -1,
0, -3, -1, -2, -4, -2, -3, 6, -3, -5, -4, -2, -4, -4, -3, -1, -2, -4, -4, -4, -1, -5, -3, -1,
-2, 0, 0, -2, -4, 1, 0, -3, 8, -4, -3, -1, -2, -2, -3, -1, -2, -3, 2, -4, -1, -4, 0, -1,
-2, -3, -4, -4, -2, -3, -4, -5, -4, 5, 1, -3, 1, -1, -4, -3, -1, -3, -2, 3, -4, 3, -4, -1,
-2, -3, -4, -5, -2, -3, -4, -4, -3, 1, 4, -3, 2, 0, -3, -3, -2, -2, -2, 1, -4, 3, -3, -1,
-1, 2, 0, -1, -4, 1, 1, -2, -1, -3, -3, 5, -2, -4, -1, -1, -1, -4, -3, -3, -1, -3, 1, -1,
-1, -2, -3, -4, -2, 0, -2, -4, -2, 1, 2, -2, 6, 0, -3, -2, -1, -2, -2, 1, -3, 2, -1, -1,
-3, -4, -4, -4, -3, -4, -4, -4, -2, -1, 0, -4, 0, 6, -4, -3, -2, 0, 3, -1, -4, 0, -4, -1,
-1, -2, -3, -2, -4, -2, -2, -3, -3, -4, -3, -1, -3, -4, 8, -1, -2, -5, -4, -3, -2, -4, -2, -1,
1, -1, 0, -1, -2, 0, 0, -1, -1, -3, -3, -1, -2, -3, -1, 5, 1, -4, -2, -2, 0, -3, 0, -1,
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -2, -1, -1, -2, -2, 1, 5, -4, -2, 0, -1, -1, -1, -1,
-3, -4, -4, -6, -3, -3, -4, -4, -3, -3, -2, -4, -2, 0, -5, -4, -4, 11, 2, -3, -5, -3, -3, -1,
-2, -3, -3, -4, -3, -2, -3, -4, 2, -2, -2, -3, -2, 3, -4, -2, -2, 2, 7, -2, -3, -2, -3, -1,
0, -3, -4, -4, -1, -3, -3, -4, -4, 3, 1, -3, 1, -1, -3, -2, 0, -3, -2, 4, -4, 2, -3, -1,
-2, -1, 5, 5, -4, 0, 1, -1, -1, -4, -4, -1, -3, -4, -2, 0, -1, -5, -3, -4, 5, -4, 0, -1,
-2, -3, -4, -5, -2, -3, -4, -5, -4, 3, 3, -3, 2, 0, -4, -3, -1, -3, -2, 2, -4, 3, -3, -1,
-1, 0, 0, 1, -4, 4, 5, -3, 0, -4, -3, 1, -1, -4, -2, 0, -1, -3, -3, -3, 0, -3, 5, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
score :: Int -> Int -> Int
score a b = blosum80 ! (a * 24 + b)
gap :: Int
gap = -6
-- Inefficient implementation
max3 a b c = max a (max b c)
align' :: [Int] -> [Int] -> Int
align' [] ys = gap * length ys
align' xs [] = gap * length xs
align' xss@(x:xs) yss@(y:ys) =
max3 (score x y + align' xs ys) (gap + align' xss ys) (gap + align' xs yss)
-- Efficient implementation
data Alignment = Match | GapX | GapY
deriving (Show)
align :: [Int] -> [Int] -> (Int, [Alignment])
align xs ys = let (value, alignment) = head (head matrix) in (value, reverse alignment)
where matrix :: [[(Int, [Alignment])]]
matrix = eachRow ys firstRow []
eachRow :: [Int] -> [(Int, [Alignment])] -> [[(Int, [Alignment])]] -> [[(Int, [Alignment])]]
eachRow [] _ row = row
eachRow (y:ys) lastRow@((s, a):rs) row = eachRow ys (reverse currentRow) (currentRow : row)
where currentRow = eachColunm y xs lastRow (s + gap, GapX : a) []
eachColunm :: Int -> [Int] -> [(Int, [Alignment])] -> (Int, [Alignment]) -> [(Int, [Alignment])] -> [(Int, [Alignment])]
eachColunm _ [] _ left colunm = left : colunm
eachColunm y (x:xs) (upperLeft:upper:lastRow) left colunm =
eachColunm y xs (upper:lastRow) bestAlignment (left : colunm)
where (ulScore, ulAlignment) = upperLeft
(lfScore, lfAlignment) = left
(upScore, upAlignment) = upper
possibleAlignments = [
(score x y + ulScore, Match : ulAlignment),
(upScore + gap, GapX : upAlignment),
(lfScore + gap, GapY : lfAlignment)]
bestAlignment = maximumBy (comparing fst) possibleAlignments
firstRow :: [(Int, [Alignment])]
firstRow = map (\x -> (x * gap, take x (repeat GapY))) [0..length xs]
-- Support Functions
alignmentToString :: (Int, [Alignment]) -> [Int] -> [Int] -> String
alignmentToString (value, as) xs ys = "x: " ++ (printX as xs) ++
"\ny: " ++ (printY as ys) ++
"\nscore: " ++ (show value)
where printX [] [] = ""
printX [] (x:xs) = (toBlosum x) : printX [] xs
printX (Match:as) (x:xs) = (toBlosum x) : printX as xs
printX (GapX:as) xs = "-" ++ printX as xs
printX (GapY:as) (x:xs) = (toBlosum x) : printX as xs
printY [] [] = ""
printY [] (y:ys) = (toBlosum y) : printY [] ys
printY (a:as) [] = "-" ++ printY as []
printY (Match:as) (y:ys) = (toBlosum y) : printY as ys
printY (GapX:as) (y:ys) = (toBlosum y) : printY as ys
printY (GapY:as) ys = "-" ++ printY as ys
seq1 = convert "HEAGAWGHEE"
seq2 = convert "PAWHEAE"
main = putStrLn $ alignmentToString (align seq1 seq2) seq1 seq2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment