Skip to content

Instantly share code, notes, and snippets.

@Tarmean
Last active February 21, 2018 16:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Tarmean/f97a6463aaad8069416cc6810e8ba4e5 to your computer and use it in GitHub Desktop.
Save Tarmean/f97a6463aaad8069416cc6810e8ba4e5 to your computer and use it in GitHub Desktop.
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module Lib (longestCommonSubstring) where
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.Internal.Fusion.Common as S
import qualified Data.Text.Internal.Fusion as S
import qualified Data.List as L
import qualified Data.Text.Unsafe as U
import qualified Data.Text.Array as A
import Data.Word (Word64)
import Data.Text.Internal (Text(..))
import Data.Bits ((.|.), (.&.))
import Data.Text.Internal.Unsafe.Shift (shiftL)
{-# NoInline snoc' #-}
snoc' :: Text -> Char -> Text
snoc' t c = t `T.snoc` c
longestCommonSubstring :: [Text] -> Text
longestCommonSubstring entries = go [""] "" where
go :: [Text] -> Text -> Text
go [] longest = longest
go current@(s:_) _ = go (filter substringOfAll $ concatMap step current) s
substringOfAll :: Text -> Bool
substringOfAll s = checkAll entries
where
checkAll (x:xs) = s `isInfixOf` x && checkAll xs
checkAll [] = True
step :: Text -> [Text]
step s = map (s `snoc'`) xs
{-# NoInline xs #-}
xs = 'A' : 'C' : 'G' : 'T' : []
isInfixOf :: Text -> Text -> Bool
isInfixOf needle haystack
| T.null needle = True
| otherwise = not . L.null . indices needle $ haystack
{-# INLINE [1] isInfixOf #-}
data T = {-# UNPACK #-} !Word64 :* {-# UNPACK #-} !Int
indices :: Text -- ^ Substring to search for (@needle@)
-> Text -- ^ Text to search in (@haystack@)
-> [Int]
indices _needle@(Text narr noff nlen) _haystack@(Text harr hoff hlen)
| nlen <= 0 || ldiff < 0 = []
| otherwise = scan 0
where
ldiff = hlen - nlen
nlast = nlen - 1
z = nindex nlast
nindex k = A.unsafeIndex narr (noff+k)
hindex k = A.unsafeIndex harr (hoff+k)
hindex' k | k == hlen = 0
| otherwise = A.unsafeIndex harr (hoff+k)
buildTable !i !msk !skp
| i >= nlast = (msk .|. swizzle z) :* skp
| otherwise = buildTable (i+1) (msk .|. swizzle c) skp'
where c = nindex i
skp' | c == z = nlen - i - 2
| otherwise = skp
swizzle k = 1 `shiftL` (fromIntegral k .&. 0x3f)
scan !i
| i > ldiff = []
| c == z && candidateMatch 0 = i : scan (i + nlen)
| otherwise = scan (i + delta)
where c = hindex (i + nlast)
candidateMatch !j
| j >= nlast = True
| hindex (i+j) /= nindex j = False
| otherwise = candidateMatch (j+1)
delta | nextInPattern = nlen + 1
| c == z = skip + 1
| otherwise = 1
where nextInPattern = mask .&. swizzle (hindex' (i+nlen)) == 0
!(mask :* skip) = buildTable 0 0 (nlen-2)
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module Lib (longestCommonSubstring) where
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.Internal.Fusion.Common as S
import qualified Data.Text.Internal.Fusion as S
import qualified Data.List as L
import qualified Data.Text.Unsafe as U
import qualified Data.Text.Array as A
import Data.Word (Word64)
import Data.Text.Internal (Text(..))
import Data.Bits ((.|.), (.&.))
import Data.Text.Internal.Unsafe.Shift (shiftL)
{-# NoInline snoc' #-}
snoc' :: Text -> Char -> Text
snoc' t c = t `T.snoc` c
longestCommonSubstring :: [Text] -> Text
longestCommonSubstring entries = go [""] "" where
go :: [Text] -> Text -> Text
go [] longest = longest
go current@(s:_) _ = go (filter substringOfAll $ concatMap step current) s
substringOfAll :: Text -> Bool
substringOfAll s = checkAll entries
where
checkAll (x:xs) = s `isInfixOf` x && checkAll xs
checkAll [] = True
step :: Text -> [Text]
step s = map (s `snoc'`) xs
{-# NoInline xs #-}
xs = 'A' : 'C' : 'G' : 'T' : []
isInfixOf :: Text -> Text -> Bool
isInfixOf needle haystack
| T.null needle = True
| otherwise = not . L.null . indices needle $ haystack
{-# INLINE [1] isInfixOf #-}
data T = {-# UNPACK #-} !Word64 :* {-# UNPACK #-} !Int
indices :: Text -- ^ Substring to search for (@needle@)
-> Text -- ^ Text to search in (@haystack@)
-> [Int]
indices _needle@(Text narr noff nlen) _haystack@(Text harr hoff hlen)
| nlen <= 0 || ldiff < 0 = []
| otherwise = scan 0
where
ldiff = hlen - nlen
nlast = nlen - 1
z = nindex nlast
nindex k = A.unsafeIndex narr (noff+k)
hindex k = A.unsafeIndex harr (hoff+k)
hindex' k | k == hlen = 0
| otherwise = A.unsafeIndex harr (hoff+k)
buildTable !i !msk !skp
| i >= nlast = (msk .|. swizzle z) :* skp
| otherwise = buildTable (i+1) (msk .|. swizzle c) skp'
where c = nindex i
skp' | c == z = nlen - i - 2
| otherwise = skp
swizzle k = 1 `shiftL` (fromIntegral k .&. 0x3f)
scan !i
| i > ldiff = []
| c == z && candidateMatch 0 = i : scan (i + nlen)
| otherwise = scan (i + delta)
where c = hindex (i + nlast)
candidateMatch !j
| j >= nlast = True
| hindex (i+j) /= nindex j = False
| otherwise = candidateMatch (j+1)
delta | nextInPattern = nlen + 1
| c == z = skip + 1
| otherwise = 1
where nextInPattern = mask .&. swizzle (hindex' (i+nlen)) == 0
!(mask :* skip) = buildTable 0 0 (nlen-2)
{-# INLINE indices #-}
==================== Tidy Core ====================
2018-02-21 12:35:35.1648169 UTC
Result size of Tidy Core
= {terms: 440, types: 194, coercions: 0, joins: 6/17}
-- RHS size: {terms: 11, types: 6, coercions: 0, joins: 0/0}
$W:*
$W:*
= \ dt dt ->
case dt of { W64# dt -> case dt of { I# dt -> :* dt dt } }
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
snoc'
snoc' = snoc
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
$trModule4
$trModule4 = "Profile-0.1.0.0-95VECWVHYo03uC96ITYgBw"#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
$trModule3
$trModule3 = TrNameS $trModule4
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
$trModule2
$trModule2 = "Lib"#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
$trModule1
$trModule1 = TrNameS $trModule2
-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
$trModule
$trModule = Module $trModule3 $trModule1
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
$krep
$krep = KindRepTyConApp $tcInt []
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
$krep1
$krep1 = KindRepTyConApp $tcWord64 []
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
$tcT2
$tcT2 = "T"#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
$tcT1
$tcT1 = TrNameS $tcT2
-- RHS size: {terms: 7, types: 0, coercions: 0, joins: 0/0}
$tcT
$tcT
= TyCon
4973444345108523477##
438727976302148318##
$trModule
$tcT1
0#
krep$*
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
$krep2
$krep2 = KindRepTyConApp $tcT []
-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
$krep3
$krep3 = KindRepFun $krep $krep2
-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
$tc':*1
$tc':*1 = KindRepFun $krep1 $krep3
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
$tc':*3
$tc':*3 = "':*"#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
$tc':*2
$tc':*2 = TrNameS $tc':*3
-- RHS size: {terms: 7, types: 0, coercions: 0, joins: 0/0}
$tc':*
$tc':*
= TyCon
12941209089629544299##
11820972641960890866##
$trModule
$tc':*2
0#
$tc':*1
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
xs
xs = C# 'A'#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
xs1
xs1 = C# 'C'#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
xs2
xs2 = C# 'G'#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
xs3
xs3 = C# 'T'#
-- RHS size: {terms: 3, types: 2, coercions: 0, joins: 0/0}
xs4
xs4 = : xs3 []
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
xs5
xs5 = : xs2 xs4
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
xs6
xs6 = : xs1 xs5
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
xs7
xs7 = : xs xs6
-- RHS size: {terms: 3, types: 2, coercions: 0, joins: 0/0}
lvl
lvl = : empty_ []
-- RHS size: {terms: 333, types: 139, coercions: 0, joins: 6/17}
longestCommonSubstring
longestCommonSubstring
= \ entries ->
letrec {
go
go
= \ ds ->
case ds of {
[] -> [];
: y ys ->
let {
z
z = go ys } in
letrec {
go1
go1
= \ ds1 ->
case ds1 of {
[] -> z;
: y1 ys1 ->
let {
lvl1
lvl1 = go1 ys1 } in
let {
x
x = snoc' y y1 } in
join {
lvl2
lvl2 = : x lvl1 } in
joinrec {
checkAll
checkAll ds2
= case ds2 of {
[] -> jump lvl2;
: x1 xs8 ->
case x of { Text dt dt1 dt2 ->
case tagToEnum# (<=# dt2 0#) of {
False ->
case x1 of { Text dt4 dt5 dt6 ->
let {
ldiff
ldiff = -# dt6 dt2 } in
case tagToEnum# (<# ldiff 0#) of {
False ->
let {
nlast
nlast = -# dt2 1# } in
case indexWord16Array# dt (+# dt1 nlast) of r#
{ __DEFAULT ->
letrec {
$wbuildTable
$wbuildTable = \ ww ww1 ww2 ->
case tagToEnum# (>=# ww nlast) of {
False ->
case indexWord16Array# dt (+# dt1 ww)
of r#1
{ __DEFAULT ->
case tagToEnum# (eqWord# r#1 r#) of {
False ->
$wbuildTable
(+# ww 1#)
(or#
ww1
(uncheckedShiftL#
1##
(andI# (word2Int# r#1) 63#)))
ww2;
True ->
$wbuildTable
(+# ww 1#)
(or#
ww1
(uncheckedShiftL#
1##
(andI# (word2Int# r#1) 63#)))
(-# (-# dt2 ww) 2#)
}
};
True ->
(# or# ww1 (uncheckedShiftL# 1## (andI# (word2Int# r#) 63#)), ww2 #)
}; } in
letrec {
$wscan
$wscan
= \ ww ->
case $wbuildTable 0# 0## (-# dt2 2#) of
{ (# ww3, ww2 #) ->
case tagToEnum# (># ww ldiff) of {
False ->
case indexWord16Array#
dt4 (+# dt5 (+# ww nlast))
of r#1
{ __DEFAULT ->
join {
$j
$j
= let {
k
k = +# ww dt2 } in
case tagToEnum# (==# k dt6) of {
False ->
case indexWord16Array#
dt4 (+# dt5 k)
of r#2
{ __DEFAULT ->
case and#
ww3
(uncheckedShiftL#
1##
(andI#
(word2Int# r#2)
63#))
of {
__DEFAULT ->
case tagToEnum#
(eqWord# r#1 r#)
of {
False ->
$wscan (+# ww 1#);
True ->
$wscan
(+# ww (+# ww2 1#))
};
0## ->
$wscan (+# ww (+# dt2 1#))
}
};
True ->
case and# ww3 1## of {
__DEFAULT ->
case tagToEnum#
(eqWord# r#1 r#)
of {
False ->
$wscan (+# ww 1#);
True ->
$wscan
(+# ww (+# ww2 1#))
};
0## ->
$wscan (+# ww (+# dt2 1#))
}
} } in
case tagToEnum# (eqWord# r#1 r#) of {
False -> jump $j;
True ->
join {
lvl5
lvl5
= let {
i
i = I# ww } in
: i ($wscan (+# ww dt2)) } in
joinrec {
$wcandidateMatch
$wcandidateMatch ww5
= case tagToEnum#
(>=# ww5 nlast)
of {
False ->
case indexWord16Array#
dt4
(+#
dt5 (+# ww ww5))
of r#2
{ __DEFAULT ->
case indexWord16Array#
dt (+# dt1 ww5)
of r#3
{ __DEFAULT ->
case tagToEnum#
(neWord# r#2 r#3)
of {
False ->
jump $wcandidateMatch
(+# ww5 1#);
True -> jump $j
}
}
};
True -> jump lvl5
}; } in
jump $wcandidateMatch 0#
}
};
True -> []
}
}; } in
case $wscan 0# of {
[] -> lvl1;
: ds3 ds4 -> jump checkAll xs8
}
};
True -> lvl1
}
};
True -> jump checkAll xs8
}
}
}; } in
jump checkAll entries
}; } in
go1 xs7
}; } in
joinrec {
go1
go1 ds longest
= case ds of wild {
[] -> longest;
: s ds1 -> jump go1 (go wild) s
}; } in
jump go1 lvl empty_
==================== Tidy Core ====================
2018-02-21 12:35:03.5803256 UTC
Result size of Tidy Core
= {terms: 439, types: 183, coercions: 0, joins: 7/20}
-- RHS size: {terms: 11, types: 6, coercions: 0, joins: 0/0}
$W:*
$W:*
= \ dt dt ->
case dt of { W64# dt -> case dt of { I# dt -> :* dt dt } }
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
snoc'
snoc' = snoc
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
$trModule4
$trModule4 = "Profile-0.1.0.0-95VECWVHYo03uC96ITYgBw"#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
$trModule3
$trModule3 = TrNameS $trModule4
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
$trModule2
$trModule2 = "Lib"#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
$trModule1
$trModule1 = TrNameS $trModule2
-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
$trModule
$trModule = Module $trModule3 $trModule1
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
$krep
$krep = KindRepTyConApp $tcInt []
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
$krep1
$krep1 = KindRepTyConApp $tcWord64 []
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
$tcT2
$tcT2 = "T"#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
$tcT1
$tcT1 = TrNameS $tcT2
-- RHS size: {terms: 7, types: 0, coercions: 0, joins: 0/0}
$tcT
$tcT
= TyCon
4973444345108523477##
438727976302148318##
$trModule
$tcT1
0#
krep$*
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
$krep2
$krep2 = KindRepTyConApp $tcT []
-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
$krep3
$krep3 = KindRepFun $krep $krep2
-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
$tc':*1
$tc':*1 = KindRepFun $krep1 $krep3
-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
$tc':*3
$tc':*3 = "':*"#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
$tc':*2
$tc':*2 = TrNameS $tc':*3
-- RHS size: {terms: 7, types: 0, coercions: 0, joins: 0/0}
$tc':*
$tc':*
= TyCon
12941209089629544299##
11820972641960890866##
$trModule
$tc':*2
0#
$tc':*1
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
xs
xs = C# 'A'#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
xs1
xs1 = C# 'C'#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
xs2
xs2 = C# 'G'#
-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
xs3
xs3 = C# 'T'#
-- RHS size: {terms: 3, types: 2, coercions: 0, joins: 0/0}
xs4
xs4 = : xs3 []
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
xs5
xs5 = : xs2 xs4
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
xs6
xs6 = : xs1 xs5
-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
xs7
xs7 = : xs xs6
-- RHS size: {terms: 3, types: 2, coercions: 0, joins: 0/0}
lvl
lvl = : empty_ []
-- RHS size: {terms: 332, types: 128, coercions: 0, joins: 7/20}
longestCommonSubstring
longestCommonSubstring
= \ entries ->
letrec {
go
go
= \ ds ->
case ds of {
[] -> [];
: y ys ->
let {
z
z = go ys } in
letrec {
go1
go1
= \ ds1 ->
case ds1 of {
[] -> z;
: y1 ys1 ->
let {
lvl1
lvl1 = go1 ys1 } in
let {
x
x = snoc' y y1 } in
join {
lvl2
lvl2 = : x lvl1 } in
joinrec {
checkAll
checkAll ds2
= case ds2 of {
[] -> jump lvl2;
: x1 xs8 ->
case x of { Text dt dt1 dt2 ->
case tagToEnum# (<=# dt2 0#) of {
False ->
case x1 of { Text dt4 dt5 dt6 ->
let {
ldiff
ldiff = -# dt6 dt2 } in
case tagToEnum# (<# ldiff 0#) of {
False ->
let {
nlast
nlast = -# dt2 1# } in
case indexWord16Array# dt (+# dt1 nlast) of r#
{ __DEFAULT ->
let {
lvl3
lvl3
= uncheckedShiftL#
1## (andI# (word2Int# r#) 63#) } in
let {
lvl4
lvl4 = +# dt2 1# } in
joinrec {
$wbuildTable
$wbuildTable = \ ww ww1 ww2 ->
case tagToEnum# (>=# ww nlast) of {
False ->
case indexWord16Array# dt (+# dt1 ww)
of r#1
{ __DEFAULT ->
case tagToEnum# (eqWord# r#1 r#) of {
False ->
jump $wbuildTable
(+# ww 1#)
(or#
ww1
(uncheckedShiftL#
1##
(andI# (word2Int# r#1) 63#)))
ww2;
True ->
jump $wbuildTable
(+# ww 1#)
(or#
ww1
(uncheckedShiftL#
1##
(andI# (word2Int# r#1) 63#)))
(-# (-# dt2 ww) 2#)
}
};
True ->
let {
ww3
ww3 = or# ww1 lvl3 } in
letrec {
$wscan
$wscan
= \ ww ->
case tagToEnum# (># ww ldiff) of {
False ->
case indexWord16Array#
dt4 (+# dt5 (+# ww nlast))
of r#1
{ __DEFAULT ->
join {
$j
$j
= let {
k
k = +# ww dt2 } in
case tagToEnum# (==# k dt6) of {
False ->
case indexWord16Array#
dt4 (+# dt5 k)
of r#2
{ __DEFAULT ->
case and#
ww3
(uncheckedShiftL#
1##
(andI#
(word2Int# r#2)
63#))
of {
__DEFAULT ->
case tagToEnum#
(eqWord# r#1 r#)
of {
False ->
$wscan (+# ww 1#);
True ->
$wscan
(+# ww (+# ww2 1#))
};
0## ->
$wscan (+# ww lvl4)
}
};
True ->
case and# ww3 1## of {
__DEFAULT ->
case tagToEnum#
(eqWord# r#1 r#)
of {
False ->
$wscan (+# ww 1#);
True ->
$wscan
(+# ww (+# ww2 1#))
};
0## ->
$wscan (+# ww lvl4)
}
} } in
case tagToEnum# (eqWord# r#1 r#) of {
False -> jump $j;
True ->
join {
lvl5
lvl5
= let {
i
i = I# ww } in
: i ($wscan (+# ww dt2)) } in
joinrec {
$wcandidateMatch
$wcandidateMatch ww5
= case tagToEnum#
(>=# ww5 nlast)
of {
False ->
case indexWord16Array#
dt4
(+#
dt5 (+# ww ww5))
of r#2
{ __DEFAULT ->
case indexWord16Array#
dt (+# dt1 ww5)
of r#3
{ __DEFAULT ->
case tagToEnum#
(neWord# r#2 r#3)
of {
False ->
jump $wcandidateMatch
(+# ww5 1#);
True -> jump $j
}
}
};
True -> jump lvl5
}; } in
jump $wcandidateMatch 0#
}
};
True -> []
}; } in
case $wscan 0# of {
[] -> lvl1;
: ds3 ds4 -> jump checkAll xs8
}
}; } in jump $wbuildTable 0# 0## (-# dt2 2#)
};
True -> lvl1
}
};
True -> jump checkAll xs8
}
}
}; } in
jump checkAll entries
}; } in
go1 xs7
}; } in
joinrec {
go1
go1 ds longest
= case ds of wild {
[] -> longest;
: s ds1 -> jump go1 (go wild) s
}; } in
jump go1 lvl empty_
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment