igorbonadio/alignment2.hs

## alignment2.hs
import Data.Array
import Data.List
import Data.Ord

fromBlosum :: Char -> Int
fromBlosum a = case elemIndex a "ARNDCQEGHILKMFPSTWYVBJZX" of
  Just i -> i
  Nothing -> error "Invalid amino acid"

toBlosum :: Int -> Char
toBlosum a = "ARNDCQEGHILKMFPSTWYVBJZX" !! a

convert :: [Char] -> [Int]
convert [] = []
convert (x:xs) = fromBlosum x : convert xs

blosum80 = listArray (0, 575) [
   5, -2, -2, -2, -1, -1, -1,  0, -2, -2, -2, -1, -1, -3, -1,  1,  0, -3, -2,  0, -2, -2, -1, -1,
  -2,  6, -1, -2, -4,  1, -1, -3,  0, -3, -3,  2, -2, -4, -2, -1, -1, -4, -3, -3, -1, -3,  0, -1,
  -2, -1,  6,  1, -3,  0, -1, -1,  0, -4, -4,  0, -3, -4, -3,  0,  0, -4, -3, -4,  5, -4,  0, -1,
  -2, -2,  1,  6, -4, -1,  1, -2, -2, -4, -5, -1, -4, -4, -2, -1, -1, -6, -4, -4,  5, -5,  1, -1,
  -1, -4, -3, -4,  9, -4, -5, -4, -4, -2, -2, -4, -2, -3, -4, -2, -1, -3, -3, -1, -4, -2, -4, -1,
  -1,  1,  0, -1, -4,  6,  2, -2,  1, -3, -3,  1,  0, -4, -2,  0, -1, -3, -2, -3,  0, -3,  4, -1,
  -1, -1, -1,  1, -5,  2,  6, -3,  0, -4, -4,  1, -2, -4, -2,  0, -1, -4, -3, -3,  1, -4,  5, -1,
   0, -3, -1, -2, -4, -2, -3,  6, -3, -5, -4, -2, -4, -4, -3, -1, -2, -4, -4, -4, -1, -5, -3, -1,
  -2,  0,  0, -2, -4,  1,  0, -3,  8, -4, -3, -1, -2, -2, -3, -1, -2, -3,  2, -4, -1, -4,  0, -1,
  -2, -3, -4, -4, -2, -3, -4, -5, -4,  5,  1, -3,  1, -1, -4, -3, -1, -3, -2,  3, -4,  3, -4, -1,
  -2, -3, -4, -5, -2, -3, -4, -4, -3,  1,  4, -3,  2,  0, -3, -3, -2, -2, -2,  1, -4,  3, -3, -1,
  -1,  2,  0, -1, -4,  1,  1, -2, -1, -3, -3,  5, -2, -4, -1, -1, -1, -4, -3, -3, -1, -3,  1, -1,
  -1, -2, -3, -4, -2,  0, -2, -4, -2,  1,  2, -2,  6,  0, -3, -2, -1, -2, -2,  1, -3,  2, -1, -1,
  -3, -4, -4, -4, -3, -4, -4, -4, -2, -1,  0, -4,  0,  6, -4, -3, -2,  0,  3, -1, -4,  0, -4, -1,
  -1, -2, -3, -2, -4, -2, -2, -3, -3, -4, -3, -1, -3, -4,  8, -1, -2, -5, -4, -3, -2, -4, -2, -1,
   1, -1,  0, -1, -2,  0,  0, -1, -1, -3, -3, -1, -2, -3, -1,  5,  1, -4, -2, -2,  0, -3,  0, -1,
   0, -1,  0, -1, -1, -1, -1, -2, -2, -1, -2, -1, -1, -2, -2,  1,  5, -4, -2,  0, -1, -1, -1, -1,
  -3, -4, -4, -6, -3, -3, -4, -4, -3, -3, -2, -4, -2,  0, -5, -4, -4, 11,  2, -3, -5, -3, -3, -1,
  -2, -3, -3, -4, -3, -2, -3, -4,  2, -2, -2, -3, -2,  3, -4, -2, -2,  2,  7, -2, -3, -2, -3, -1,
   0, -3, -4, -4, -1, -3, -3, -4, -4,  3,  1, -3,  1, -1, -3, -2,  0, -3, -2,  4, -4,  2, -3, -1,
  -2, -1,  5,  5, -4,  0,  1, -1, -1, -4, -4, -1, -3, -4, -2,  0, -1, -5, -3, -4,  5, -4,  0, -1,
  -2, -3, -4, -5, -2, -3, -4, -5, -4,  3,  3, -3,  2,  0, -4, -3, -1, -3, -2,  2, -4,  3, -3, -1,
  -1,  0,  0,  1, -4,  4,  5, -3,  0, -4, -3,  1, -1, -4, -2,  0, -1, -3, -3, -3,  0, -3,  5, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]

score :: Int -> Int -> Int
score a b = blosum80 ! (a * 24 + b)

gap :: Int
gap = -6

-- Inefficient implementation

max3 a b c = max a (max b c)

align' :: [Int] -> [Int] -> Int
align' [] ys = gap * length ys
align' xs [] = gap * length xs
align' xss@(x:xs) yss@(y:ys) =
  max3 (score x y + align' xs ys) (gap + align' xss ys) (gap + align' xs yss)

-- Efficient implementation

data Alignment = Match | GapX | GapY
  deriving (Show)

align :: [Int] -> [Int] -> (Int, [Alignment])
align xs ys = let (value, alignment) = head (head matrix) in (value, reverse alignment)
  where matrix :: [[(Int, [Alignment])]]
        matrix = eachRow ys firstRow []

        eachRow :: [Int] -> [(Int, [Alignment])] -> [[(Int, [Alignment])]] -> [[(Int, [Alignment])]]
        eachRow [] _ row = row
        eachRow (y:ys) lastRow@((s, a):rs) row = eachRow ys (reverse currentRow) (currentRow : row)
          where currentRow = eachColunm y xs lastRow (s + gap, GapX : a) []

        eachColunm :: Int -> [Int] -> [(Int, [Alignment])] -> (Int, [Alignment]) -> [(Int, [Alignment])] -> [(Int, [Alignment])]
        eachColunm _ [] _ left colunm = left : colunm
        eachColunm y (x:xs) (upperLeft:upper:lastRow) left colunm =
          eachColunm y xs (upper:lastRow) bestAlignment (left : colunm)
            where (ulScore, ulAlignment) = upperLeft
                  (lfScore, lfAlignment) = left
                  (upScore, upAlignment) = upper
                  possibleAlignments = [
                    (score x y + ulScore, Match : ulAlignment),
                    (upScore + gap, GapX : upAlignment),
                    (lfScore + gap, GapY : lfAlignment)]
                  bestAlignment = maximumBy (comparing fst) possibleAlignments

        firstRow :: [(Int, [Alignment])]
        firstRow = map (\x -> (x * gap, take x (repeat GapY))) [0..length xs]

-- Support Functions

alignmentToString :: (Int, [Alignment]) -> [Int] -> [Int] -> String
alignmentToString (value, as) xs ys = "x: " ++ (printX as xs) ++
                                      "\ny: " ++ (printY as ys) ++
                                      "\nscore: " ++ (show value)
  where printX [] [] = ""
        printX [] (x:xs) = (toBlosum x) : printX [] xs
        printX (Match:as) (x:xs) = (toBlosum x) : printX as xs
        printX (GapX:as) xs = "-" ++ printX as xs
        printX (GapY:as) (x:xs) = (toBlosum x) : printX as xs

        printY [] [] = ""
        printY [] (y:ys) = (toBlosum y) : printY [] ys
        printY (a:as) [] = "-" ++ printY as []
        printY (Match:as) (y:ys) = (toBlosum y) : printY as ys
        printY (GapX:as) (y:ys) = (toBlosum y) : printY as ys
        printY (GapY:as) ys = "-" ++ printY as ys

seq1 = convert "HEAGAWGHEE"

seq2 = convert "PAWHEAE"

main = putStrLn $ alignmentToString (align seq1 seq2) seq1 seq2
	import Data.Array
	import Data.List
	import Data.Ord

	fromBlosum :: Char -> Int
	fromBlosum a = case elemIndex a "ARNDCQEGHILKMFPSTWYVBJZX" of
	Just i -> i
	Nothing -> error "Invalid amino acid"

	toBlosum :: Int -> Char
	toBlosum a = "ARNDCQEGHILKMFPSTWYVBJZX" !! a

	convert :: [Char] -> [Int]
	convert [] = []
	convert (x:xs) = fromBlosum x : convert xs

	blosum80 = listArray (0, 575) [
	5, -2, -2, -2, -1, -1, -1, 0, -2, -2, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -2, -1, -1,
	-2, 6, -1, -2, -4, 1, -1, -3, 0, -3, -3, 2, -2, -4, -2, -1, -1, -4, -3, -3, -1, -3, 0, -1,
	-2, -1, 6, 1, -3, 0, -1, -1, 0, -4, -4, 0, -3, -4, -3, 0, 0, -4, -3, -4, 5, -4, 0, -1,
	-2, -2, 1, 6, -4, -1, 1, -2, -2, -4, -5, -1, -4, -4, -2, -1, -1, -6, -4, -4, 5, -5, 1, -1,
	-1, -4, -3, -4, 9, -4, -5, -4, -4, -2, -2, -4, -2, -3, -4, -2, -1, -3, -3, -1, -4, -2, -4, -1,
	-1, 1, 0, -1, -4, 6, 2, -2, 1, -3, -3, 1, 0, -4, -2, 0, -1, -3, -2, -3, 0, -3, 4, -1,
	-1, -1, -1, 1, -5, 2, 6, -3, 0, -4, -4, 1, -2, -4, -2, 0, -1, -4, -3, -3, 1, -4, 5, -1,
	0, -3, -1, -2, -4, -2, -3, 6, -3, -5, -4, -2, -4, -4, -3, -1, -2, -4, -4, -4, -1, -5, -3, -1,
	-2, 0, 0, -2, -4, 1, 0, -3, 8, -4, -3, -1, -2, -2, -3, -1, -2, -3, 2, -4, -1, -4, 0, -1,
	-2, -3, -4, -4, -2, -3, -4, -5, -4, 5, 1, -3, 1, -1, -4, -3, -1, -3, -2, 3, -4, 3, -4, -1,
	-2, -3, -4, -5, -2, -3, -4, -4, -3, 1, 4, -3, 2, 0, -3, -3, -2, -2, -2, 1, -4, 3, -3, -1,
	-1, 2, 0, -1, -4, 1, 1, -2, -1, -3, -3, 5, -2, -4, -1, -1, -1, -4, -3, -3, -1, -3, 1, -1,
	-1, -2, -3, -4, -2, 0, -2, -4, -2, 1, 2, -2, 6, 0, -3, -2, -1, -2, -2, 1, -3, 2, -1, -1,
	-3, -4, -4, -4, -3, -4, -4, -4, -2, -1, 0, -4, 0, 6, -4, -3, -2, 0, 3, -1, -4, 0, -4, -1,
	-1, -2, -3, -2, -4, -2, -2, -3, -3, -4, -3, -1, -3, -4, 8, -1, -2, -5, -4, -3, -2, -4, -2, -1,
	1, -1, 0, -1, -2, 0, 0, -1, -1, -3, -3, -1, -2, -3, -1, 5, 1, -4, -2, -2, 0, -3, 0, -1,
	0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -2, -1, -1, -2, -2, 1, 5, -4, -2, 0, -1, -1, -1, -1,
	-3, -4, -4, -6, -3, -3, -4, -4, -3, -3, -2, -4, -2, 0, -5, -4, -4, 11, 2, -3, -5, -3, -3, -1,
	-2, -3, -3, -4, -3, -2, -3, -4, 2, -2, -2, -3, -2, 3, -4, -2, -2, 2, 7, -2, -3, -2, -3, -1,
	0, -3, -4, -4, -1, -3, -3, -4, -4, 3, 1, -3, 1, -1, -3, -2, 0, -3, -2, 4, -4, 2, -3, -1,
	-2, -1, 5, 5, -4, 0, 1, -1, -1, -4, -4, -1, -3, -4, -2, 0, -1, -5, -3, -4, 5, -4, 0, -1,
	-2, -3, -4, -5, -2, -3, -4, -5, -4, 3, 3, -3, 2, 0, -4, -3, -1, -3, -2, 2, -4, 3, -3, -1,
	-1, 0, 0, 1, -4, 4, 5, -3, 0, -4, -3, 1, -1, -4, -2, 0, -1, -3, -3, -3, 0, -3, 5, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]

	score :: Int -> Int -> Int
	score a b = blosum80 ! (a * 24 + b)

	gap :: Int
	gap = -6

	-- Inefficient implementation

	max3 a b c = max a (max b c)

	align' :: [Int] -> [Int] -> Int
	align' [] ys = gap * length ys
	align' xs [] = gap * length xs
	align' xss@(x:xs) yss@(y:ys) =
	max3 (score x y + align' xs ys) (gap + align' xss ys) (gap + align' xs yss)

	-- Efficient implementation

	data Alignment = Match \| GapX \| GapY
	deriving (Show)

	align :: [Int] -> [Int] -> (Int, [Alignment])
	align xs ys = let (value, alignment) = head (head matrix) in (value, reverse alignment)
	where matrix :: [[(Int, [Alignment])]]
	matrix = eachRow ys firstRow []

	eachRow :: [Int] -> [(Int, [Alignment])] -> [[(Int, [Alignment])]] -> [[(Int, [Alignment])]]
	eachRow [] _ row = row
	eachRow (y:ys) lastRow@((s, a):rs) row = eachRow ys (reverse currentRow) (currentRow : row)
	where currentRow = eachColunm y xs lastRow (s + gap, GapX : a) []

	eachColunm :: Int -> [Int] -> [(Int, [Alignment])] -> (Int, [Alignment]) -> [(Int, [Alignment])] -> [(Int, [Alignment])]
	eachColunm _ [] _ left colunm = left : colunm
	eachColunm y (x:xs) (upperLeft:upper:lastRow) left colunm =
	eachColunm y xs (upper:lastRow) bestAlignment (left : colunm)
	where (ulScore, ulAlignment) = upperLeft
	(lfScore, lfAlignment) = left
	(upScore, upAlignment) = upper
	possibleAlignments = [
	(score x y + ulScore, Match : ulAlignment),
	(upScore + gap, GapX : upAlignment),
	(lfScore + gap, GapY : lfAlignment)]
	bestAlignment = maximumBy (comparing fst) possibleAlignments

	firstRow :: [(Int, [Alignment])]
	firstRow = map (\x -> (x * gap, take x (repeat GapY))) [0..length xs]

	-- Support Functions

	alignmentToString :: (Int, [Alignment]) -> [Int] -> [Int] -> String
	alignmentToString (value, as) xs ys = "x: " ++ (printX as xs) ++
	"\ny: " ++ (printY as ys) ++
	"\nscore: " ++ (show value)
	where printX [] [] = ""
	printX [] (x:xs) = (toBlosum x) : printX [] xs
	printX (Match:as) (x:xs) = (toBlosum x) : printX as xs
	printX (GapX:as) xs = "-" ++ printX as xs
	printX (GapY:as) (x:xs) = (toBlosum x) : printX as xs

	printY [] [] = ""
	printY [] (y:ys) = (toBlosum y) : printY [] ys
	printY (a:as) [] = "-" ++ printY as []
	printY (Match:as) (y:ys) = (toBlosum y) : printY as ys
	printY (GapX:as) (y:ys) = (toBlosum y) : printY as ys
	printY (GapY:as) ys = "-" ++ printY as ys

	seq1 = convert "HEAGAWGHEE"

	seq2 = convert "PAWHEAE"

	main = putStrLn $ alignmentToString (align seq1 seq2) seq1 seq2