Created
July 20, 2022 12:59
-
-
Save Lev135/79ef510aa58dda486dc2a1bd925ac3be to your computer and use it in GitHub Desktop.
Check if every list composed from the set of patterns can be uniquely decomposed into patterns using Sardinas-Peterson's algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module LookAhead | |
( Pattern (..), | |
ConflictPatterns, | |
checkUniquePatSplit, | |
checkUniquePatSplit', | |
) | |
where | |
import Control.Monad (guard, when) | |
import Data.Bifunctor (Bifunctor (..)) | |
import Data.Maybe (catMaybes, maybeToList) | |
import Data.Set (Set) | |
import qualified Data.Set as S | |
-- | Patterns --- parts of lists of `c` | |
-- `k` --- pattern's name type | |
data Pattern k c = Pattern | |
{ -- | unique name of pattern | |
name :: k, | |
-- | list of sets of acceptable elements on each position | |
body :: [Set c], | |
-- | list of NOT acceptable elements before/after pattern | |
behind, ahead :: [Set c] | |
} | |
deriving (Eq, Ord, Show) | |
data RPattern k c = RPattern | |
{ name :: k, | |
body :: [Set c], | |
rbehind, ahead :: [Set c] | |
} | |
deriving (Eq, Ord, Show) | |
data Suff c = Suff | |
{ rbehind :: [Set c], | |
body :: [Set c], | |
ahead :: [Set c] | |
} | |
deriving (Eq, Ord, Show) | |
data PatDiv k c = PatDiv | |
{ rprefPatNames, rpatNames :: [k], | |
suff :: Suff c | |
} | |
deriving (Eq, Ord, Show) | |
zipWithKeepRest :: (a -> a -> a) -> [a] -> [a] -> [a] | |
zipWithKeepRest _ [] ys = ys | |
zipWithKeepRest _ xs [] = xs | |
zipWithKeepRest g (x : xs) (y : ys) = g x y : zipWithKeepRest g xs ys | |
initDiv :: RPattern k cs -> PatDiv k cs | |
initDiv RPattern {name, body, ahead} = | |
PatDiv | |
{ rprefPatNames = [], | |
rpatNames = [name], | |
suff = | |
Suff | |
{ rbehind = [], | |
body, | |
ahead | |
} | |
} | |
{- | |
behind big body big ahead | |
-------|=========================|~~~~~~~~~~~~~~ | |
-------|============|~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
behind small body small ahead | |
--------------------|============|~~~~~~~~~~~~~ | |
res behind res body res ahead | |
-} | |
divStep :: Ord c => Int -> PatDiv k c -> RPattern k c -> Maybe (PatDiv k c) | |
divStep | |
maxBehind | |
PatDiv {rprefPatNames, rpatNames, suff = Suff {rbehind, body, ahead}} | |
RPattern {name, body = pbody, rbehind = prbehind, ahead = pahead} = | |
do | |
let rbehind' = diffPref prbehind rbehind | |
guard $ not $ any null rbehind' | |
let drbehind = zipWith S.intersection pbody body | |
guard $ not $ any null drbehind | |
let (bigBody, smallBody, bigAhead, smallAhead) = | |
if length pbody <= length body | |
then (body, pbody, ahead, pahead) | |
else (pbody, body, pahead, ahead) | |
let body' = diffPref smallAhead $ drop (length smallBody) bigBody | |
guard $ not $ any null body' | |
let ahead' = | |
zipWithKeepRest | |
S.union | |
bigAhead | |
(drop (length bigBody - length smallBody) smallAhead) | |
let (rprefPatNames', rpatNames') = | |
if length pbody <= length body | |
then (name : rprefPatNames, rpatNames) | |
else (rprefPatNames, name : rpatNames) | |
return | |
PatDiv | |
{ rprefPatNames = rprefPatNames', | |
rpatNames = rpatNames', | |
suff = | |
Suff | |
{ rbehind = take maxBehind $ reverse drbehind <> rbehind', | |
body = body', | |
ahead = ahead' | |
} | |
} | |
where | |
diffPref [] ys = ys | |
diffPref _ [] = error "Long prefix" | |
diffPref (x : xs) (y : ys) = S.difference y x : diffPref xs ys | |
-- | Two sequences of patterns, that lead to the same list | |
type ConflictPatterns k = ([k], [k]) | |
-- | Check if every list composed from the set of patterns can be uniquely decomposed into patterns | |
checkUniquePatSplit :: forall k c. (Eq k, Ord c) => [Pattern k c] -> Either (ConflictPatterns k) () | |
checkUniquePatSplit pats = | |
first (bimap reverse reverse) $ | |
mapM_ (h S.empty) [res | p <- rpats, p' <- rpats, p /= p', res <- maybeToList $ divStep maxBehind (initDiv p') p] | |
where | |
rpats = | |
( \Pattern {name, body, behind, ahead} -> | |
RPattern {name, body, rbehind = reverse behind, ahead} | |
) | |
<$> pats | |
maxBehind = maximum $ (\RPattern {rbehind} -> length rbehind) <$> rpats | |
h :: Set (Suff c) -> PatDiv k c -> Either (ConflictPatterns k) () | |
h olds pdiv@PatDiv {rprefPatNames, rpatNames, suff = suff@Suff {body}} = do | |
when (null body) $ | |
Left (rprefPatNames, rpatNames) | |
mapM_ (h (S.insert suff olds)) $ | |
filter ((`S.notMember` olds) . (\PatDiv {suff} -> suff)) $ | |
catMaybes $ divStep maxBehind pdiv <$> rpats | |
checkUniquePatSplit' :: Ord c => [[c]] -> Either (ConflictPatterns [c]) () | |
checkUniquePatSplit' pats = | |
checkUniquePatSplit | |
( map | |
( \x -> Pattern {name = x, body = S.singleton <$> x, behind = [], ahead = []} | |
) | |
pats | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment