Created
April 18, 2015 21:16
-
-
Save xdl/b5d1f8e17174b49ec523 to your computer and use it in GitHub Desktop.
Global Alignment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module GlobalAlignmentProblem( | |
solve | |
) where | |
import Data.Array | |
import qualified Data.Map as M | |
data Input = Input {string1 :: String, | |
string2 :: String | |
} | |
deriving Show | |
data Pointer = Unoccupied | DEL | INS | SCORE deriving (Show, Eq) | |
defaultInput :: Input | |
defaultInput = Input "PLEASANTLY" "MEANLY" | |
solve :: [String] -> IO () | |
solve [] = print $ globalAlignmentProblem $ defaultInput | |
solve [inputFile] = do | |
input <- parseFile inputFile | |
print $ globalAlignmentProblem $ input | |
solve [inputFile, outputFile] = do | |
input <- parseFile inputFile | |
let (num, (one, two)) = globalAlignmentProblem input | |
writeFile outputFile $ show num | |
appendFile outputFile "\n" | |
appendFile outputFile $ one | |
appendFile outputFile "\n" | |
appendFile outputFile $ two | |
parseFile :: String -> IO Input | |
parseFile file = do | |
contents <- readFile file | |
let [str1,str2] = lines contents | |
return $ Input str1 str2 | |
{-globalAlignmentProblem :: Input -> IO (Int, (String, String))-} | |
{-globalAlignmentProblem input = do-} | |
{-print str1-} | |
{-print str2-} | |
{-print $ matrix!0-} | |
{-print matrix-} | |
{-print backtrack-} | |
{-print $ outputLCS backtrack ("","") (str1, str2) len1 len2-} | |
{-return (1, ("asd", "asd"))-} | |
globalAlignmentProblem :: Input -> (Int, (String, String)) | |
globalAlignmentProblem input = ((matrix!len1)!len2, outputLCS backtrack ("","") (str1, str2) len1 len2) | |
where | |
str1 = string1 input | |
str2 = string2 input | |
len1 = length str1 | |
len2 = length str2 | |
matrix = listArray (0, len1) (map populateMatrixRow [0..len1]) | |
populateMatrixRow i = listArray (0, len2) [fillCell i j | j <- [0.. len2]] | |
fillCell i j | |
| i == 0 = if j == 0 then 0 else (-5)*j | |
| j == 0 = if i == 0 then 0 else (-5)*i | |
| otherwise = maximum [matrix!(i-1)!j - 5, (matrix!i)!(j-1) - 5, (matrix!(i-1))!(j-1) + blosum62 (str1!!(i-1), str2!!(j-1))] | |
backtrack = listArray (0, len1) (map populateBacktrackRow [0.. len1]) | |
populateBacktrackRow i = listArray(0, len2) [fillBacktrack i j | j <- [0.. len2]] | |
fillBacktrack i j | |
| i == 0 = if j == 0 then Unoccupied else INS | |
| j == 0 = if i == 0 then Unoccupied else DEL | |
| (matrix!i)!j == (matrix!(i-1))!j - 5 = DEL | |
| (matrix!i)!j == (matrix!i)!(j-1) - 5 = INS | |
| (matrix!i)!j == ((matrix!(i-1))!(j-1) + blosum62 (str1!!(i-1), str2!!(j-1))) = SCORE | |
| otherwise = Unoccupied | |
outputLCS :: Array Int (Array Int Pointer) -> (String, String) -> (String, String) -> Int -> Int -> (String, String) | |
outputLCS backtrack alignment v i j | |
| i == 0 && j == 0 = alignment | |
| (backtrack!i)!j == DEL = outputLCS backtrack ((str1!!(i-1)):curr1, '-':curr2) v (i-1) j | |
| (backtrack!i)!j == INS = outputLCS backtrack ('-':curr1, str2!!(j-1):curr2) v i (j-1) | |
| (backtrack!i)!j == SCORE = outputLCS backtrack (str1!!(i-1):curr1, str2!!(j-1):curr2) v (i-1) (j-1) | |
| otherwise = alignment | |
where | |
curr1 = fst alignment | |
curr2 = snd alignment | |
str1 = fst v | |
str2 = snd v | |
blosum62 :: (Char, Char) -> Int | |
blosum62 m = M.findWithDefault (-4) m $ M.fromList | |
[(('A','A'),4), (('A','B'),-2), (('A','C'),0), (('A','D'),-2), | |
(('A','E'),-1), (('A','F'),-2), (('A','G'),0), (('A','H'),-2), | |
(('A','I'),-1), (('A','K'),-1), (('A','L'),-1), (('A','M'),-1), | |
(('A','N'),-2), (('A','P'),-1), (('A','Q'),-1), (('A','R'),-1), | |
(('A','S'),1), (('A','T'),0), (('A','V'),0), (('A','W'),-3), | |
(('A','X'),-1), (('A','Y'),-2), (('A','Z'),-1), (('B','A'),-2), | |
(('B','B'),4), (('B','C'),-3), (('B','D'),4), (('B','E'),1), | |
(('B','F'),-3), (('B','G'),-1), (('B','H'),0), (('B','I'),-3), | |
(('B','K'),0), (('B','L'),-4), (('B','M'),-3), (('B','N'),3), | |
(('B','P'),-2), (('B','Q'),0), (('B','R'),-1), (('B','S'),0), | |
(('B','T'),-1), (('B','V'),-3), (('B','W'),-4), (('B','X'),-1), | |
(('B','Y'),-3), (('B','Z'),1), (('C','A'),0), (('C','B'),-3), | |
(('C','C'),9), (('C','D'),-3), (('C','E'),-4), (('C','F'),-2), | |
(('C','G'),-3), (('C','H'),-3), (('C','I'),-1), (('C','K'),-3), | |
(('C','L'),-1), (('C','M'),-1), (('C','N'),-3), (('C','P'),-3), | |
(('C','Q'),-3), (('C','R'),-3), (('C','S'),-1), (('C','T'),-1), | |
(('C','V'),-1), (('C','W'),-2), (('C','X'),-1), (('C','Y'),-2), | |
(('C','Z'),-3), (('D','A'),-2), (('D','B'),4), (('D','C'),-3), | |
(('D','D'),6), (('D','E'),2), (('D','F'),-3), (('D','G'),-1), | |
(('D','H'),-1), (('D','I'),-3), (('D','K'),-1), (('D','L'),-4), | |
(('D','M'),-3), (('D','N'),1), (('D','P'),-1), (('D','Q'),0), | |
(('D','R'),-2), (('D','S'),0), (('D','T'),-1), (('D','V'),-3), | |
(('D','W'),-4), (('D','X'),-1), (('D','Y'),-3), (('D','Z'),1), | |
(('E','A'),-1), (('E','B'),1), (('E','C'),-4), (('E','D'),2), | |
(('E','E'),5), (('E','F'),-3), (('E','G'),-2), (('E','H'),0), | |
(('E','I'),-3), (('E','K'),1), (('E','L'),-3), (('E','M'),-2), | |
(('E','N'),0), (('E','P'),-1), (('E','Q'),2), (('E','R'),0), | |
(('E','S'),0), (('E','T'),-1), (('E','V'),-2), (('E','W'),-3), | |
(('E','X'),-1), (('E','Y'),-2), (('E','Z'),4), (('F','A'),-2), | |
(('F','B'),-3), (('F','C'),-2), (('F','D'),-3), (('F','E'),-3), | |
(('F','F'),6), (('F','G'),-3), (('F','H'),-1), (('F','I'),0), | |
(('F','K'),-3), (('F','L'),0), (('F','M'),0), (('F','N'),-3), | |
(('F','P'),-4), (('F','Q'),-3), (('F','R'),-3), (('F','S'),-2), | |
(('F','T'),-2), (('F','V'),-1), (('F','W'),1), (('F','X'),-1), | |
(('F','Y'),3), (('F','Z'),-3), (('G','A'),0), (('G','B'),-1), | |
(('G','C'),-3), (('G','D'),-1), (('G','E'),-2), (('G','F'),-3), | |
(('G','G'),6), (('G','H'),-2), (('G','I'),-4), (('G','K'),-2), | |
(('G','L'),-4), (('G','M'),-3), (('G','N'),0), (('G','P'),-2), | |
(('G','Q'),-2), (('G','R'),-2), (('G','S'),0), (('G','T'),-2), | |
(('G','V'),-3), (('G','W'),-2), (('G','X'),-1), (('G','Y'),-3), | |
(('G','Z'),-2), (('H','A'),-2), (('H','B'),0), (('H','C'),-3), | |
(('H','D'),-1), (('H','E'),0), (('H','F'),-1), (('H','G'),-2), | |
(('H','H'),8), (('H','I'),-3), (('H','K'),-1), (('H','L'),-3), | |
(('H','M'),-2), (('H','N'),1), (('H','P'),-2), (('H','Q'),0), | |
(('H','R'),0), (('H','S'),-1), (('H','T'),-2), (('H','V'),-3), | |
(('H','W'),-2), (('H','X'),-1), (('H','Y'),2), (('H','Z'),0), | |
(('I','A'),-1), (('I','B'),-3), (('I','C'),-1), (('I','D'),-3), | |
(('I','E'),-3), (('I','F'),0), (('I','G'),-4), (('I','H'),-3), | |
(('I','I'),4), (('I','K'),-3), (('I','L'),2), (('I','M'),1), | |
(('I','N'),-3), (('I','P'),-3), (('I','Q'),-3), (('I','R'),-3), | |
(('I','S'),-2), (('I','T'),-1), (('I','V'),3), (('I','W'),-3), | |
(('I','X'),-1), (('I','Y'),-1), (('I','Z'),-3), (('K','A'),-1), | |
(('K','B'),0), (('K','C'),-3), (('K','D'),-1), (('K','E'),1), | |
(('K','F'),-3), (('K','G'),-2), (('K','H'),-1), (('K','I'),-3), | |
(('K','K'),5), (('K','L'),-2), (('K','M'),-1), (('K','N'),0), | |
(('K','P'),-1), (('K','Q'),1), (('K','R'),2), (('K','S'),0), | |
(('K','T'),-1), (('K','V'),-2), (('K','W'),-3), (('K','X'),-1), | |
(('K','Y'),-2), (('K','Z'),1), (('L','A'),-1), (('L','B'),-4), | |
(('L','C'),-1), (('L','D'),-4), (('L','E'),-3), (('L','F'),0), | |
(('L','G'),-4), (('L','H'),-3), (('L','I'),2), (('L','K'),-2), | |
(('L','L'),4), (('L','M'),2), (('L','N'),-3), (('L','P'),-3), | |
(('L','Q'),-2), (('L','R'),-2), (('L','S'),-2), (('L','T'),-1), | |
(('L','V'),1), (('L','W'),-2), (('L','X'),-1), (('L','Y'),-1), | |
(('L','Z'),-3), (('M','A'),-1), (('M','B'),-3), (('M','C'),-1), | |
(('M','D'),-3), (('M','E'),-2), (('M','F'),0), (('M','G'),-3), | |
(('M','H'),-2), (('M','I'),1), (('M','K'),-1), (('M','L'),2), | |
(('M','M'),5), (('M','N'),-2), (('M','P'),-2), (('M','Q'),0), | |
(('M','R'),-1), (('M','S'),-1), (('M','T'),-1), (('M','V'),1), | |
(('M','W'),-1), (('M','X'),-1), (('M','Y'),-1), (('M','Z'),-1), | |
(('N','A'),-2), (('N','B'),3), (('N','C'),-3), (('N','D'),1), | |
(('N','E'),0), (('N','F'),-3), (('N','G'),0), (('N','H'),1), | |
(('N','I'),-3), (('N','K'),0), (('N','L'),-3), (('N','M'),-2), | |
(('N','N'),6), (('N','P'),-2), (('N','Q'),0), (('N','R'),0), | |
(('N','S'),1), (('N','T'),0), (('N','V'),-3), (('N','W'),-4), | |
(('N','X'),-1), (('N','Y'),-2), (('N','Z'),0), (('P','A'),-1), | |
(('P','B'),-2), (('P','C'),-3), (('P','D'),-1), (('P','E'),-1), | |
(('P','F'),-4), (('P','G'),-2), (('P','H'),-2), (('P','I'),-3), | |
(('P','K'),-1), (('P','L'),-3), (('P','M'),-2), (('P','N'),-2), | |
(('P','P'),7), (('P','Q'),-1), (('P','R'),-2), (('P','S'),-1), | |
(('P','T'),-1), (('P','V'),-2), (('P','W'),-4), (('P','X'),-1), | |
(('P','Y'),-3), (('P','Z'),-1), (('Q','A'),-1), (('Q','B'),0), | |
(('Q','C'),-3), (('Q','D'),0), (('Q','E'),2), (('Q','F'),-3), | |
(('Q','G'),-2), (('Q','H'),0), (('Q','I'),-3), (('Q','K'),1), | |
(('Q','L'),-2), (('Q','M'),0), (('Q','N'),0), (('Q','P'),-1), | |
(('Q','Q'),5), (('Q','R'),1), (('Q','S'),0), (('Q','T'),-1), | |
(('Q','V'),-2), (('Q','W'),-2), (('Q','X'),-1), (('Q','Y'),-1), | |
(('Q','Z'),3), (('R','A'),-1), (('R','B'),-1), (('R','C'),-3), | |
(('R','D'),-2), (('R','E'),0), (('R','F'),-3), (('R','G'),-2), | |
(('R','H'),0), (('R','I'),-3), (('R','K'),2), (('R','L'),-2), | |
(('R','M'),-1), (('R','N'),0), (('R','P'),-2), (('R','Q'),1), | |
(('R','R'),5), (('R','S'),-1), (('R','T'),-1), (('R','V'),-3), | |
(('R','W'),-3), (('R','X'),-1), (('R','Y'),-2), (('R','Z'),0), | |
(('S','A'),1), (('S','B'),0), (('S','C'),-1), (('S','D'),0), | |
(('S','E'),0), (('S','F'),-2), (('S','G'),0), (('S','H'),-1), | |
(('S','I'),-2), (('S','K'),0), (('S','L'),-2), (('S','M'),-1), | |
(('S','N'),1), (('S','P'),-1), (('S','Q'),0), (('S','R'),-1), | |
(('S','S'),4), (('S','T'),1), (('S','V'),-2), (('S','W'),-3), | |
(('S','X'),-1), (('S','Y'),-2), (('S','Z'),0), (('T','A'),0), | |
(('T','B'),-1), (('T','C'),-1), (('T','D'),-1), (('T','E'),-1), | |
(('T','F'),-2), (('T','G'),-2), (('T','H'),-2), (('T','I'),-1), | |
(('T','K'),-1), (('T','L'),-1), (('T','M'),-1), (('T','N'),0), | |
(('T','P'),-1), (('T','Q'),-1), (('T','R'),-1), (('T','S'),1), | |
(('T','T'),5), (('T','V'),0), (('T','W'),-2), (('T','X'),-1), | |
(('T','Y'),-2), (('T','Z'),-1), (('V','A'),0), (('V','B'),-3), | |
(('V','C'),-1), (('V','D'),-3), (('V','E'),-2), (('V','F'),-1), | |
(('V','G'),-3), (('V','H'),-3), (('V','I'),3), (('V','K'),-2), | |
(('V','L'),1), (('V','M'),1), (('V','N'),-3), (('V','P'),-2), | |
(('V','Q'),-2), (('V','R'),-3), (('V','S'),-2), (('V','T'),0), | |
(('V','V'),4), (('V','W'),-3), (('V','X'),-1), (('V','Y'),-1), | |
(('V','Z'),-2), (('W','A'),-3), (('W','B'),-4), (('W','C'),-2), | |
(('W','D'),-4), (('W','E'),-3), (('W','F'),1), (('W','G'),-2), | |
(('W','H'),-2), (('W','I'),-3), (('W','K'),-3), (('W','L'),-2), | |
(('W','M'),-1), (('W','N'),-4), (('W','P'),-4), (('W','Q'),-2), | |
(('W','R'),-3), (('W','S'),-3), (('W','T'),-2), (('W','V'),-3), | |
(('W','W'),11), (('W','X'),-1), (('W','Y'),2), (('W','Z'),-3), | |
(('X','A'),-1), (('X','B'),-1), (('X','C'),-1), (('X','D'),-1), | |
(('X','E'),-1), (('X','F'),-1), (('X','G'),-1), (('X','H'),-1), | |
(('X','I'),-1), (('X','K'),-1), (('X','L'),-1), (('X','M'),-1), | |
(('X','N'),-1), (('X','P'),-1), (('X','Q'),-1), (('X','R'),-1), | |
(('X','S'),-1), (('X','T'),-1), (('X','V'),-1), (('X','W'),-1), | |
(('X','X'),-1), (('X','Y'),-1), (('X','Z'),-1), (('Y','A'),-2), | |
(('Y','B'),-3), (('Y','C'),-2), (('Y','D'),-3), (('Y','E'),-2), | |
(('Y','F'),3), (('Y','G'),-3), (('Y','H'),2), (('Y','I'),-1), | |
(('Y','K'),-2), (('Y','L'),-1), (('Y','M'),-1), (('Y','N'),-2), | |
(('Y','P'),-3), (('Y','Q'),-1), (('Y','R'),-2), (('Y','S'),-2), | |
(('Y','T'),-2), (('Y','V'),-1), (('Y','W'),2), (('Y','X'),-1), | |
(('Y','Y'),7), (('Y','Z'),-2), (('Z','A'),-1), (('Z','B'),1), | |
(('Z','C'),-3), (('Z','D'),1), (('Z','E'),4), (('Z','F'),-3), | |
(('Z','G'),-2), (('Z','H'),0), (('Z','I'),-3), (('Z','K'),1), | |
(('Z','L'),-3), (('Z','M'),-1), (('Z','N'),0), (('Z','P'),-1), | |
(('Z','Q'),3), (('Z','R'),0), (('Z','S'),0), (('Z','T'),-1), | |
(('Z','V'),-2), (('Z','W'),-3), (('Z','X'),-1), (('Z','Y'),-2), | |
(('Z','Z'),4)] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment