Created
November 4, 2020 16:56
-
-
Save joehendrix/40aa23d31df474a86dcc922f05d92835 to your computer and use it in GitHub Desktop.
"DFA" based cover
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
import Control.Monad | |
import Data.Foldable | |
import Data.IntMap (IntMap) | |
import qualified Data.IntMap as IMap | |
import Data.Map (Map) | |
import qualified Data.Map as Map | |
import Data.Maybe | |
import Data.Set (Set) | |
import qualified Data.Set as Set | |
import Data.String | |
-- | Maps indexing in string to character at that index. | |
type CharMap = IntMap Char | |
mkCharMap :: String -> CharMap | |
mkCharMap s = snd (foldl (\(n,m) ch -> (n+1, IMap.insert n ch m)) (0, IMap.empty) s) | |
type CharIndex = Int | |
-- | A DFA state is a set of character indices in string (or one more for end of string) | |
type DFAState = Set CharIndex | |
-- | Add a state to the dfa state, but close it. | |
closedState' :: CharMap -> DFAState -> CharIndex -> DFAState | |
closedState' m s st | |
| Set.member st s = s | |
| IMap.findWithDefault 'x' st m == '*' = | |
closedState' m (Set.insert st s) (st+1) | |
| otherwise = Set.insert st s | |
closedState :: CharMap -> CharIndex -> DFAState | |
closedState m = closedState' m Set.empty | |
-- | Edge summary describes all out edges of a DFAState | |
data EdgeSummary = ES { esCharMap :: !(Map Char DFAState) | |
, esDefault :: !DFAState | |
-- ^ States to reach on character that is not | |
-- in domain of char map | |
} | |
deriving (Show) | |
-- Make an edge summary a monoid so we can merge edges from different states. | |
instance Semigroup EdgeSummary where | |
x <> y = | |
let join = \_ xs ys -> Just (xs <> ys) | |
insLeft = fmap (mappend (esDefault y)) | |
insRight = fmap (mappend (esDefault x)) | |
in ES { esCharMap = Map.mergeWithKey join insLeft insRight (esCharMap x) (esCharMap y) | |
, esDefault = Set.union (esDefault x) (esDefault y) | |
} | |
instance Monoid EdgeSummary where | |
mempty = ES { esCharMap = Map.empty, esDefault = Set.empty } | |
mkStateEdge :: CharMap -> CharIndex -> EdgeSummary | |
mkStateEdge m s = | |
case IMap.lookup s m of | |
Just '*' -> mempty { esDefault = closedState m s } | |
Just '?' -> mempty { esDefault = closedState m (s + 1) } | |
Just c -> mempty { esCharMap = Map.singleton c (closedState m (s+1)) } | |
Nothing -> mempty | |
-- | Make an edge summary for a DFAState | |
mkEdgeSummary :: CharMap -> DFAState -> EdgeSummary | |
mkEdgeSummary m s = foldMap (mkStateEdge m) s | |
-- | Helper function to create DFA edge summaries | |
toDFA' :: CharMap | |
-> Map DFAState EdgeSummary | |
-> Set DFAState | |
-> Map DFAState EdgeSummary | |
toDFA' m edges r | |
| Set.null r = edges | |
| otherwise = | |
let (s, r') = Set.deleteFindMin r | |
es = mkEdgeSummary m s | |
-- Insert new starte into state to explore | |
insState :: Set DFAState -> DFAState -> Set DFAState | |
insState p s | Map.member s edges = p | |
| otherwise = Set.insert s p | |
-- Get next states to explore | |
next = foldl' insState (insState r' (esDefault es)) (esCharMap es) | |
in toDFA' m (Map.insert s es edges) next | |
-- | A DFA tuned to the string matching problem. | |
data DFA = DFA { dfaInit :: !DFAState | |
, dfaTrans :: !(Map DFAState EdgeSummary) | |
, dfaAcceptIndex :: !CharIndex -- ^ Index of accepting edge | |
} | |
deriving (Show) | |
-- | Construct a DFA from a string | |
toDFA :: String -> DFA | |
toDFA s = | |
let cm = mkCharMap s | |
i = closedState cm 0 | |
m = toDFA' cm Map.empty (Set.singleton i) | |
in DFA i m (IMap.size cm) | |
-- | Return true if DFAState is an accepting state. | |
accept :: DFA -> DFAState -> Bool | |
accept m s = Set.member (dfaAcceptIndex m) s | |
type StatePair = (DFAState, DFAState) | |
-- | Merge two edge summaries to get map from known character to state pair. | |
mergeEdgeSummary :: EdgeSummary -> EdgeSummary -> Map Char StatePair | |
mergeEdgeSummary x y = | |
let join = \_ xs ys -> Just (xs, ys) | |
insLeft = fmap (\xs -> (xs, esDefault y)) | |
insRight = fmap (\ys -> (esDefault x, ys)) | |
in Map.mergeWithKey join insLeft insRight (esCharMap x) (esCharMap y) | |
-- | Set of characters in alphabet (use emptyset for any character). | |
newtype Alphabet = Alphabet (Set Char) | |
instance IsString Alphabet where | |
fromString s = Alphabet (Set.fromList s) | |
-- | Find character in alphabet that is not bound in map. | |
findMissing :: Alphabet -> Map Char s -> Maybe Char | |
findMissing (Alphabet sig) m | |
| Set.null sig = Just '?' -- Treat null as infinite | |
| Set.null s = Nothing | |
| otherwise = Just (Set.findMin s) | |
where s = Set.difference sig (Map.keysSet m) | |
-- | @cover' sig x y seen r@ looks for a string starting from a state in | |
-- @r@ that is accepted by @x@ and not by @y@. | |
cover' :: Alphabet -- ^ Alphabet | |
-> DFA | |
-> DFA | |
-> Set StatePair | |
-> [(String,StatePair)] | |
-- ^ Unexplored state pairs and string witnessing state pair is reachable. | |
-> Maybe String | |
cover' _ _x _y _s [] = Nothing | |
cover' sig x y seen ((wit,(xs,ys)):r) | |
| not (accept x xs), accept y ys = Just (reverse wit) | |
| otherwise = | |
let xe = Map.findWithDefault mempty xs (dfaTrans x) | |
ye = Map.findWithDefault mempty ys (dfaTrans y) | |
cm = mergeEdgeSummary xe ye | |
-- Insert 'c' and `p` is a string. | |
insSubsume (s, l) c p | |
| Set.member p s = (s,l) | |
| otherwise = (Set.insert p s, (c : wit, p) : l) | |
cmr = Map.foldlWithKey insSubsume (seen, r) cm | |
(seen', r') = | |
case findMissing sig cm of | |
Nothing -> cmr | |
Just d -> insSubsume cmr d (esDefault xe, esDefault ye) | |
in cover' sig x y seen' r' | |
-- | `cover0 sig x y` returns Nothing if every string matched by `y` is matched by `x` | |
-- and otherwise returns a witness that is matched by `y` and rejected by `x`. | |
-- | |
-- The characters in sig are the complete alphabet | |
cover :: Alphabet -> String -> String -> Maybe String | |
cover sig x y = cover' sig xd yd (Set.singleton p) [("", p)] | |
where xd = toDFA x | |
yd = toDFA y | |
p = (dfaInit xd, dfaInit yd) | |
test :: | |
Bool {- ^ expected result -} -> | |
String {- ^ super -} -> | |
String {- ^ sub -} -> | |
IO () {- ^ report when super does not cover sub -} | |
test r sup sub = do | |
unless (r == isNothing (cover "" sup sub)) $ | |
putStrLn $ "FAIL: " ++ show r ++ " != cover " | |
++ show sup ++ " " ++ show sub | |
main :: IO () | |
main = | |
do test True "*" "a" | |
test True "*a" "a" | |
test True "*a" "aba" | |
test True "*a" "ab*a" | |
test True "*a" "a*ba" | |
test True "*a" "a*b*a" | |
test True "?*" "*a" | |
test True "*?" "*a" | |
test True "*?" "a*" | |
test True "?*" "a*" | |
test True "*" "?*" | |
test True "*?" "?*" | |
test True "?*" "*a*" | |
test True "?" "?" | |
test False "*a*" "?*" | |
test True "?" "a" | |
test False "a" "?" | |
test True "a" "a" | |
test False "a" "b" | |
test False "a?" "?a" | |
test False "?a" "*a" | |
test False "?" "*a" | |
test False "*foo*" "bar" | |
test True "*foo*" "foo" | |
test True "*foo*" "xfofoo" | |
test True "*" "*foo*" | |
test False "*foo*" "*" | |
test True "*" "*" | |
test False "?" "*" | |
test True "*?" "*?" | |
test True "?*" "*?" | |
test True "*?*?*?*" "*?????*" | |
test False "*??*??*??*" "*?????*" | |
test True "?*" "*a" | |
test True "???*" "*a*a*a" | |
test False "???*" "*a*a*" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment