Skip to content

Instantly share code, notes, and snippets.

/Diff.hs Secret

Created June 6, 2013 02:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/91295c6236d14d7b4446 to your computer and use it in GitHub Desktop.
Save anonymous/91295c6236d14d7b4446 to your computer and use it in GitHub Desktop.
Compile: ghc Miller.hs -O2 -fllvm -prof -fprof-auto Run: ./Miller +RTS -s -p -hy
{-# LANGUAGE BangPatterns #-}
-- |
-- Implementation of "An O(NP) Sequence Comparison Algorithm" by Sun Wu, Udi
-- Manber, Gene Myers and Web Miller
-- Note: P=(D-(M-N))/2 where D is shortest edit distance, M,N sequence lengths,
-- M >= N
--
import Data.Vector.Unboxed.Mutable as MU
import Data.Vector.Unboxed as U hiding (mapM_)
import Control.Monad.ST as ST
import Control.Monad.Primitive (PrimState)
import Control.Monad (when)
import Data.STRef (newSTRef, modifySTRef, readSTRef)
import Data.Int
type MVI1 s = MVector (PrimState (ST s)) Int
cmp :: U.Vector Int32 -> U.Vector Int32 -> Int -> Int -> Int
cmp a b i j = go 0 i j
where
n = U.length a
m = U.length b
go !len !i !j| (i<n) && (j<m) && ((unsafeIndex a i) == (unsafeIndex b j)) = go (len+1) (i+1) (j+1)
| otherwise = len
{-#INLINE cmp #-}
newVI1 :: Int -> Int -> ST s (MVI1 s)
newVI1 n x = do
a <- new n
mapM_ (\i -> MU.unsafeWrite a i x) [0..n-1]
return a
-- function to find previous y on diagonal k for furthest point
findYP :: MVI1 s -> Int -> Int -> ST s (Int,Int)
findYP fp k offset = do
let k0 = k+offset-1
k1 = k+offset+1
y0 <- MU.unsafeRead fp k0 >>= \x -> return $ 1+x
y1 <- MU.unsafeRead fp k1
if y0 > y1 then return (k0,y0)
else return (k1,y1)
{-#INLINE findYP #-}
gridWalk :: Vector Int32 -> Vector Int32 -> MVI1 s -> Int -> (Vector Int32 -> Vector Int32 -> Int -> Int -> Int) -> ST s ()
gridWalk a b fp !k cmp = {-#SCC gridWalk #-} do
let !offset = 1+U.length a
(!kp,!yp) <- {-#SCC findYP #-} findYP fp k offset
let xp = yp-k
len = {-#SCC cmp #-} cmp a b xp yp
x = xp+len
y = yp+len
{-#SCC "updateFP" #-} MU.unsafeWrite fp (k+offset) y
return ()
{-#INLINE gridWalk #-}
-- As noted in the paper, we find furthest point given diagonal k, and current set of furthest points.
-- The function below executes ct times, and updates furthest point (fp), snake node indices in snake
-- array (snodes), and inserts new snakes in snake array as they are found during furthest point search
findSnakes :: Vector Int32 -> Vector Int32 -> MVI1 s -> Int -> Int -> (Vector Int32 -> Vector Int32 -> Int -> Int -> Int) -> (Int -> Int -> Int) -> ST s ()
findSnakes a b fp !k !ct cmp op = {-#SCC findSnakes #-} U.forM_ (U.fromList [0..ct-1]) (\x -> gridWalk a b fp (op k x) cmp)
{-#INLINE findSnakes #-}
-- while tail-recursive loop courtesy of Gabriel Gonzalez
-- see http://www.haskellforall.com/2012/01/haskell-for-c-programmers-for-loops.html
while :: (Monad m) => m Bool -> m a -> m ()
while !cond !action = do
c <- cond
when c $ do
action
while cond action
-- Helper function for lcs
lcsh :: Vector Int32 -> Vector Int32 -> Bool -> Int
lcsh a b flip = runST $ do
let n = U.length a
m = U.length b
delta = m-n
offset=n+1
deloff=m+1 -- array index location of diagonal at delta
fp <- newVI1 (m+n+3) (-1) -- array of furthest points
p <- newSTRef 0 -- minimum number of deletions for a
while (MU.unsafeRead fp (deloff) >>= \x -> return $! (x<m))
$ do
n <- readSTRef p
findSnakes a b fp (-n) (delta+n) cmp (+) -- vertical traversal
findSnakes a b fp (delta+n) n cmp (-) -- horizontal traversal
findSnakes a b fp delta 1 cmp (-) -- diagonal traversal
modifySTRef p (+1) -- increment p by 1
-- length of LCS is n-p. p must be decremented by 1 first for correct value of p
readSTRef p >>= \x -> return $ (U.length a)-(x-1)
{-#INLINABLE lcsh #-}
-- Function to find longest common subsequence given unboxed vectors a and b
-- It returns indices of LCS in a and b
lcs :: Vector Int32 -> Vector Int32 -> Int
lcs a b | (U.length a > U.length b) = lcsh b a True
| otherwise = lcsh a b False
{-#INLINABLE lcs #-}
main = print $ lcs (U.fromList [0..3999]) (U.fromList [3999..7999])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment