public
Last active

  • Download Gist
julesjacobs.hs
Haskell
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
-- n: the number of identical characters a before index i
-- k: the number of at most two different characters a and b before i
step :: (Eq a, Num t) => a -> (t, t, a, a) -> (t, t, a, a)
step x (n,k,a,b)
| x==a = (n+1,k+1,x,b)
| x==b = (1,k+1,x,a)
| otherwise = (1,n+1,x,a)
 
useStepBytestring :: B.ByteString -> B.ByteString
useStepBytestring bs
| B.length bs <= 2 = bs
| otherwise = extractString bs $ B.foldl' f ((if u == v then 2 else 1,2,v,u),((0,2),2)) vs
where (u,v,vs) = (B.index bs 0, B.index bs 1, B.drop 2 bs)
f ((!n,!k,!a,!b),(!o,!i)) x = (s2,(update o,i+1))
where s2@(n',k',a',b') = step x (n,k,a,b)
update (o1,l1) = if k' > l1
then (i-k'+1,k')
else (o1,l1)
 
 
extractString as (_,((off,len),_)) = B.take len (B.drop off as)
simple.hs
Haskell
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
{-# LANGUAGE OverloadedStrings #-}
module Main where
 
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import Data.List(maximumBy)
import Data.Function(on)
import qualified Data.Set as S
import Data.Word8(Word8)
import Test.QuickCheck
import Test.HUnit
 
 
findLongestSequence :: B.ByteString -> B.ByteString
findLongestSequence = findLongestSequence_ findSequence
 
findLongestSequence2 :: B.ByteString -> B.ByteString
findLongestSequence2 = findLongestSequence_ findSequenceCustom
 
findLongestSequence_ :: (BC.ByteString -> Int) -> B.ByteString -> B.ByteString
findLongestSequence_ findSeq s = B.take len $ B.drop (B.length s - remainingChars) s
where (remainingChars,len) = maximumBy (compare `on` snd) [(B.length ss, findSeq ss) | ss <- B.tails s, B.length ss > 0]
 
-- using a set
findSequence :: BC.ByteString -> Int
findSequence ss = count S.empty ss 0
where count m s res
| B.null s || S.size m' > 2 = res
| otherwise = count m' (B.tail s) (res+1)
where m' = S.insert (B.head s) m
 
-- using custom data type (only works for 2 elements)
data TwoQueue = Empty
| OneIn {-# UNPACK #-} !Word8
| TwoIn {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
 
-- using custom data type (only works for 2 elements)
findSequenceCustom :: BC.ByteString -> Int
findSequenceCustom ss = count Empty (ss, B.length ss) 0
where count _ (_,0) res = res
count Empty (s,n) res = count (OneIn $! B.head s) (B.tail s,n-1) (res+1)
count (OneIn a) (s,n) res
| B.head s == a = count (OneIn a) (B.tail s,n-1) (res+1)
| otherwise = count (TwoIn a (B.head s))(B.tail s,n-1) (res+1)
count (TwoIn a b) (s,n) res
| B.head s == a || B.head s == b = count (TwoIn a b) (B.tail s,n-1) (res+1)
| otherwise = res
 
tests = sequence_ [findSequence "aabbaac" @?= 6
,findSequence "caabb" @?= 3
,findLongestSequence "caabb" @?= "aabb"
,findLongestSequence "caabbaaa" @?= "aabbaaa"
,findLongestSequence "ccab" @?= "cca"]
 
newtype SeqString = SeqString BC.ByteString deriving (Eq,Show)
instance Arbitrary SeqString where
arbitrary = (SeqString . BC.pack) `fmap` listOf (elements "abcd")
prop_findModel (SeqString s) = findSequence s == findSequenceCustom s
filterSpaces = B.filter (not . isSpace)
isSpace c = c <= 0x20 -- just eliminate special characters
main = B.getContents >>= print . findLongestSequence2 . filterSpaces

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.