Skip to content

Instantly share code, notes, and snippets.

@marcmo
Last active December 16, 2015 02:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marcmo/5363788 to your computer and use it in GitHub Desktop.
Save marcmo/5363788 to your computer and use it in GitHub Desktop.
-- n: the number of identical characters a before index i
-- k: the number of at most two different characters a and b before i
step :: (Eq a, Num t) => a -> (t, t, a, a) -> (t, t, a, a)
step x (n,k,a,b)
| x==a = (n+1,k+1,x,b)
| x==b = (1,k+1,x,a)
| otherwise = (1,n+1,x,a)
useStepBytestring :: B.ByteString -> B.ByteString
useStepBytestring bs
| B.length bs <= 2 = bs
| otherwise = extractString bs $ B.foldl' f ((if u == v then 2 else 1,2,v,u),((0,2),2)) vs
where (u,v,vs) = (B.index bs 0, B.index bs 1, B.drop 2 bs)
f ((!n,!k,!a,!b),(!o,!i)) x = (s2,(update o,i+1))
where s2@(n',k',a',b') = step x (n,k,a,b)
update (o1,l1) = if k' > l1
then (i-k'+1,k')
else (o1,l1)
extractString as (_,((off,len),_)) = B.take len (B.drop off as)
{-# LANGUAGE OverloadedStrings #-}
module Main where
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import Data.List(maximumBy)
import Data.Function(on)
import qualified Data.Set as S
import Data.Word8(Word8)
import Test.QuickCheck
import Test.HUnit
findLongestSequence :: B.ByteString -> B.ByteString
findLongestSequence = findLongestSequence_ findSequence
findLongestSequence2 :: B.ByteString -> B.ByteString
findLongestSequence2 = findLongestSequence_ findSequenceCustom
findLongestSequence_ :: (BC.ByteString -> Int) -> B.ByteString -> B.ByteString
findLongestSequence_ findSeq s = B.take len $ B.drop (B.length s - remainingChars) s
where (remainingChars,len) = maximumBy (compare `on` snd) [(B.length ss, findSeq ss) | ss <- B.tails s, B.length ss > 0]
-- using a set
findSequence :: BC.ByteString -> Int
findSequence ss = count S.empty ss 0
where count m s res
| B.null s || S.size m' > 2 = res
| otherwise = count m' (B.tail s) (res+1)
where m' = S.insert (B.head s) m
-- using custom data type (only works for 2 elements)
data TwoQueue = Empty
| OneIn {-# UNPACK #-} !Word8
| TwoIn {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
-- using custom data type (only works for 2 elements)
findSequenceCustom :: BC.ByteString -> Int
findSequenceCustom ss = count Empty (ss, B.length ss) 0
where count _ (_,0) res = res
count Empty (s,n) res = count (OneIn $! B.head s) (B.tail s,n-1) (res+1)
count (OneIn a) (s,n) res
| B.head s == a = count (OneIn a) (B.tail s,n-1) (res+1)
| otherwise = count (TwoIn a (B.head s))(B.tail s,n-1) (res+1)
count (TwoIn a b) (s,n) res
| B.head s == a || B.head s == b = count (TwoIn a b) (B.tail s,n-1) (res+1)
| otherwise = res
tests = sequence_ [findSequence "aabbaac" @?= 6
,findSequence "caabb" @?= 3
,findLongestSequence "caabb" @?= "aabb"
,findLongestSequence "caabbaaa" @?= "aabbaaa"
,findLongestSequence "ccab" @?= "cca"]
newtype SeqString = SeqString BC.ByteString deriving (Eq,Show)
instance Arbitrary SeqString where
arbitrary = (SeqString . BC.pack) `fmap` listOf (elements "abcd")
prop_findModel (SeqString s) = findSequence s == findSequenceCustom s
filterSpaces = B.filter (not . isSpace)
isSpace c = c <= 0x20 -- just eliminate special characters
main = B.getContents >>= print . findLongestSequence2 . filterSpaces
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment