Skip to content

Instantly share code, notes, and snippets.

@joehendrix
Created November 4, 2020 16:56
Show Gist options
  • Save joehendrix/40aa23d31df474a86dcc922f05d92835 to your computer and use it in GitHub Desktop.
Save joehendrix/40aa23d31df474a86dcc922f05d92835 to your computer and use it in GitHub Desktop.
"DFA" based cover
{-# LANGUAGE OverloadedStrings #-}
import Control.Monad
import Data.Foldable
import Data.IntMap (IntMap)
import qualified Data.IntMap as IMap
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set
import Data.String
-- | Maps indexing in string to character at that index.
type CharMap = IntMap Char
mkCharMap :: String -> CharMap
mkCharMap s = snd (foldl (\(n,m) ch -> (n+1, IMap.insert n ch m)) (0, IMap.empty) s)
type CharIndex = Int
-- | A DFA state is a set of character indices in string (or one more for end of string)
type DFAState = Set CharIndex
-- | Add a state to the dfa state, but close it.
closedState' :: CharMap -> DFAState -> CharIndex -> DFAState
closedState' m s st
| Set.member st s = s
| IMap.findWithDefault 'x' st m == '*' =
closedState' m (Set.insert st s) (st+1)
| otherwise = Set.insert st s
closedState :: CharMap -> CharIndex -> DFAState
closedState m = closedState' m Set.empty
-- | Edge summary describes all out edges of a DFAState
data EdgeSummary = ES { esCharMap :: !(Map Char DFAState)
, esDefault :: !DFAState
-- ^ States to reach on character that is not
-- in domain of char map
}
deriving (Show)
-- Make an edge summary a monoid so we can merge edges from different states.
instance Semigroup EdgeSummary where
x <> y =
let join = \_ xs ys -> Just (xs <> ys)
insLeft = fmap (mappend (esDefault y))
insRight = fmap (mappend (esDefault x))
in ES { esCharMap = Map.mergeWithKey join insLeft insRight (esCharMap x) (esCharMap y)
, esDefault = Set.union (esDefault x) (esDefault y)
}
instance Monoid EdgeSummary where
mempty = ES { esCharMap = Map.empty, esDefault = Set.empty }
mkStateEdge :: CharMap -> CharIndex -> EdgeSummary
mkStateEdge m s =
case IMap.lookup s m of
Just '*' -> mempty { esDefault = closedState m s }
Just '?' -> mempty { esDefault = closedState m (s + 1) }
Just c -> mempty { esCharMap = Map.singleton c (closedState m (s+1)) }
Nothing -> mempty
-- | Make an edge summary for a DFAState
mkEdgeSummary :: CharMap -> DFAState -> EdgeSummary
mkEdgeSummary m s = foldMap (mkStateEdge m) s
-- | Helper function to create DFA edge summaries
toDFA' :: CharMap
-> Map DFAState EdgeSummary
-> Set DFAState
-> Map DFAState EdgeSummary
toDFA' m edges r
| Set.null r = edges
| otherwise =
let (s, r') = Set.deleteFindMin r
es = mkEdgeSummary m s
-- Insert new starte into state to explore
insState :: Set DFAState -> DFAState -> Set DFAState
insState p s | Map.member s edges = p
| otherwise = Set.insert s p
-- Get next states to explore
next = foldl' insState (insState r' (esDefault es)) (esCharMap es)
in toDFA' m (Map.insert s es edges) next
-- | A DFA tuned to the string matching problem.
data DFA = DFA { dfaInit :: !DFAState
, dfaTrans :: !(Map DFAState EdgeSummary)
, dfaAcceptIndex :: !CharIndex -- ^ Index of accepting edge
}
deriving (Show)
-- | Construct a DFA from a string
toDFA :: String -> DFA
toDFA s =
let cm = mkCharMap s
i = closedState cm 0
m = toDFA' cm Map.empty (Set.singleton i)
in DFA i m (IMap.size cm)
-- | Return true if DFAState is an accepting state.
accept :: DFA -> DFAState -> Bool
accept m s = Set.member (dfaAcceptIndex m) s
type StatePair = (DFAState, DFAState)
-- | Merge two edge summaries to get map from known character to state pair.
mergeEdgeSummary :: EdgeSummary -> EdgeSummary -> Map Char StatePair
mergeEdgeSummary x y =
let join = \_ xs ys -> Just (xs, ys)
insLeft = fmap (\xs -> (xs, esDefault y))
insRight = fmap (\ys -> (esDefault x, ys))
in Map.mergeWithKey join insLeft insRight (esCharMap x) (esCharMap y)
-- | Set of characters in alphabet (use emptyset for any character).
newtype Alphabet = Alphabet (Set Char)
instance IsString Alphabet where
fromString s = Alphabet (Set.fromList s)
-- | Find character in alphabet that is not bound in map.
findMissing :: Alphabet -> Map Char s -> Maybe Char
findMissing (Alphabet sig) m
| Set.null sig = Just '?' -- Treat null as infinite
| Set.null s = Nothing
| otherwise = Just (Set.findMin s)
where s = Set.difference sig (Map.keysSet m)
-- | @cover' sig x y seen r@ looks for a string starting from a state in
-- @r@ that is accepted by @x@ and not by @y@.
cover' :: Alphabet -- ^ Alphabet
-> DFA
-> DFA
-> Set StatePair
-> [(String,StatePair)]
-- ^ Unexplored state pairs and string witnessing state pair is reachable.
-> Maybe String
cover' _ _x _y _s [] = Nothing
cover' sig x y seen ((wit,(xs,ys)):r)
| not (accept x xs), accept y ys = Just (reverse wit)
| otherwise =
let xe = Map.findWithDefault mempty xs (dfaTrans x)
ye = Map.findWithDefault mempty ys (dfaTrans y)
cm = mergeEdgeSummary xe ye
-- Insert 'c' and `p` is a string.
insSubsume (s, l) c p
| Set.member p s = (s,l)
| otherwise = (Set.insert p s, (c : wit, p) : l)
cmr = Map.foldlWithKey insSubsume (seen, r) cm
(seen', r') =
case findMissing sig cm of
Nothing -> cmr
Just d -> insSubsume cmr d (esDefault xe, esDefault ye)
in cover' sig x y seen' r'
-- | `cover0 sig x y` returns Nothing if every string matched by `y` is matched by `x`
-- and otherwise returns a witness that is matched by `y` and rejected by `x`.
--
-- The characters in sig are the complete alphabet
cover :: Alphabet -> String -> String -> Maybe String
cover sig x y = cover' sig xd yd (Set.singleton p) [("", p)]
where xd = toDFA x
yd = toDFA y
p = (dfaInit xd, dfaInit yd)
test ::
Bool {- ^ expected result -} ->
String {- ^ super -} ->
String {- ^ sub -} ->
IO () {- ^ report when super does not cover sub -}
test r sup sub = do
unless (r == isNothing (cover "" sup sub)) $
putStrLn $ "FAIL: " ++ show r ++ " != cover "
++ show sup ++ " " ++ show sub
main :: IO ()
main =
do test True "*" "a"
test True "*a" "a"
test True "*a" "aba"
test True "*a" "ab*a"
test True "*a" "a*ba"
test True "*a" "a*b*a"
test True "?*" "*a"
test True "*?" "*a"
test True "*?" "a*"
test True "?*" "a*"
test True "*" "?*"
test True "*?" "?*"
test True "?*" "*a*"
test True "?" "?"
test False "*a*" "?*"
test True "?" "a"
test False "a" "?"
test True "a" "a"
test False "a" "b"
test False "a?" "?a"
test False "?a" "*a"
test False "?" "*a"
test False "*foo*" "bar"
test True "*foo*" "foo"
test True "*foo*" "xfofoo"
test True "*" "*foo*"
test False "*foo*" "*"
test True "*" "*"
test False "?" "*"
test True "*?" "*?"
test True "?*" "*?"
test True "*?*?*?*" "*?????*"
test False "*??*??*??*" "*?????*"
test True "?*" "*a"
test True "???*" "*a*a*a"
test False "???*" "*a*a*"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment