Skip to content

Instantly share code, notes, and snippets.

@fizbin

fizbin/regexequiv.hs

Last active Dec 27, 2020
Embed
What would you like to do?
Code to determine whether two regular expressions are equivalent, and if not to find a distinguishing string
module Main where
-- Takes two arguments in a limited regex-like language, and tells if
-- they are equivalent. This equivalence is shown by either saying
-- "there is no string which matches one regex but not the other" or
-- by giving a string which matches one but not the other.
-- Prints out:
-- First arg. parsed
-- Second arg. parsed
-- Result of trying to find distinguishing string assuming only "abcde"
-- is the regex alphabet
-- Result of trying to find distinguishing string assuming all of Char
-- is the regex alphabet
-- The language accepted is basically standard egrep regex syntax if the only
-- metacharacters were . * | ( ) \
-- plus then \N means Null (match nothing), \E means "everything" (aka .*)
-- Also, add & as a metacharacter to mean "And"
-- add ! as a postfix operator to mean "Not"
-- Note: no '[abc]' support. Use "(a|b|c)"; for [^abc] use "(.&(a|b|c)!)"
import System.Environment (getArgs)
import qualified Data.Set as S
import qualified Data.Map.Strict as M
import qualified Data.Map.Merge.Strict as M
import Data.Maybe (listToMaybe)
import Control.Applicative
-- import Debug.Trace
data Regex alphabet =
Null
| Everything
| Epsilon
| Any -- Any single char; used for "."
| C alphabet
| Star (Regex alphabet)
| Not (Regex alphabet)
| Seq (Regex alphabet) (Regex alphabet)
| Or (Regex alphabet) (Regex alphabet)
| And (Regex alphabet) (Regex alphabet)
deriving (Eq, Ord, Show, Read)
nullable :: Regex a -> Bool
nullable Null = False
nullable Everything = True
nullable Epsilon = True
nullable Any = False
nullable (C _) = False
nullable (Star _) = True
nullable (Not x) = not (nullable x)
nullable (Seq a b) = nullable a && nullable b
nullable (Or a b) = nullable a || nullable b
nullable (And a b) = nullable a && nullable b
mkNot :: Regex a -> Regex a
mkNot (Not x) = x
mkNot Null = Everything
mkNot Everything = Null
mkNot x = Not x
mkStar :: Regex a -> Regex a
mkStar Null = Epsilon
mkStar Everything = Everything
mkStar Any = Everything
mkStar Epsilon = Epsilon
mkStar (Or Epsilon x) = mkStar x
mkStar y@(Star _) = y
mkStar x = Star x
mkSeq :: Regex a -> Regex a -> Regex a
mkSeq Null _ = Null
mkSeq _ Null = Null
mkSeq Epsilon x = x
mkSeq x Epsilon = x
mkSeq (Seq x y) z = mkSeq x (mkSeq y z)
mkSeq x y = Seq x y
mkOr :: (Ord a) => Regex a -> Regex a -> Regex a
mkOr Null x = x
mkOr Everything _ = Everything
mkOr (Or x y) z = mkOr x (mkOr y z)
mkOr x y@(Or _ _) = Or x y -- otherwise, it's possible to infinite loop
mkOr x y = case compare x y of
GT -> mkOr y x
EQ -> x
LT -> Or x y
mkAnd :: (Ord a) => Regex a -> Regex a -> Regex a
mkAnd Null _ = Null
mkAnd Everything x = x
mkAnd (And x y) z = mkAnd x (mkAnd y z)
mkAnd x y = case compare x y of
GT -> mkAnd y x
EQ -> x
LT -> And x y
simplify :: Ord a => Regex a -> Regex a
simplify (Or x y) = mkOr (simplify x) (simplify y)
simplify (And x y) = mkAnd (simplify x) (simplify y)
simplify (Seq x y) = mkSeq (simplify x) (simplify y)
simplify (Star x) = mkStar (simplify x)
simplify (Not x) = mkNot (simplify x)
simplify x = x
mergeWithDefault :: Ord k => (a -> b -> c) -> a -> b
-> M.Map k a -> M.Map k b -> M.Map k c
mergeWithDefault mergeFn default1 default2 =
M.merge
(M.mapMissing $ const (`mergeFn` default2))
(M.mapMissing $ const (default1 `mergeFn`))
(M.zipWithMatched $ const mergeFn)
-- some function combiners that are useful:
(***) :: (a -> b) -> (c -> d) -> (a, c) -> (b, d)
f *** g = \(a1, a2) -> (f a1, g a2)
(****) :: (a -> b1 -> b2) -> (c -> d1 -> d2) -> (a, c) -> (b1, d1) -> (b2, d2)
f **** g = \(a1, a2) (b1, b2) -> (f a1 b1, g a2 b2)
-- Returns a derivative for "some other letter not in map", and a map of letter to derivative rel. to that letter
derivatives :: (Ord a) => Regex a -> (Regex a, M.Map a (Regex a))
derivatives Any = (Epsilon, M.empty)
derivatives Epsilon = (Null, M.empty)
derivatives Null = (Null, M.empty)
derivatives Everything = (Everything, M.empty)
derivatives (C a) = (Null, M.singleton a Epsilon)
derivatives (Seq x y) =
let dx = (`mkSeq` y) *** M.map (`mkSeq` y) $ derivatives x
dy = derivatives y
in if nullable x
then (mkOr **** mergeWithDefault mkOr (fst dx) (fst dy)) dx dy
else dx
derivatives (Star x) =
(`mkSeq` Star x) *** M.map (`mkSeq` Star x) $ derivatives x
derivatives (Or x y) =
let dx = derivatives x
dy = derivatives y
in (mkOr **** mergeWithDefault mkOr (fst dx) (fst dy)) dx dy
derivatives (And x y) =
let dx = derivatives x
dy = derivatives y
in (mkAnd **** mergeWithDefault mkAnd (fst dx) (fst dy)) dx dy
derivatives (Not x) = mkNot *** M.map mkNot $ derivatives x
toSeq :: [a] -> Regex a
toSeq = foldr (mkSeq . C) Epsilon
findDistinguishingString :: Ord a =>
([a] -> Maybe a) -> Regex a -> Regex a -> Maybe [a]
findDistinguishingString findNewChar = go S.empty
where
go seen r1 r2 | (r1, r2) `S.member` seen = Nothing
go _ r1 r2 | r1 == r2 = Nothing
go _ r1 r2 | nullable r1 /= nullable r2 = Just []
go seen r1 r2 =
let (dr1def, dr1map) = derivatives r1
(dr2def, dr2map) = derivatives r2
findrest res1 res2 =
go ((r1, r2) `S.insert` seen) res1 res2
charxmp k res1 res2 = (k:) <$> findrest res1 res2
defxmp = do xmpfst <- findNewChar (M.keys dr1map ++ M.keys dr2map)
charxmp xmpfst dr1def dr2def
in defxmp <|>
listToMaybe (M.elems $ M.merge
(M.mapMaybeMissing $ \k x -> charxmp k x dr2def)
(M.mapMaybeMissing $ \k y -> charxmp k dr1def y)
(M.zipWithMaybeMatched charxmp)
dr1map dr2map)
findMyNewChar :: [Char] -> Maybe Char
findMyNewChar xs
| 'a' `notElem` xs = Just 'a'
| 'b' `notElem` xs = Just 'b'
| 'c' `notElem` xs = Just 'c'
| 'd' `notElem` xs = Just 'd'
| 'e' `notElem` xs = Just 'e'
| otherwise = Nothing
findMyNewChar' :: [Char] -> Maybe Char
findMyNewChar' xs = Just $ succ $ maximum ('@':xs)
parse :: String -> Regex Char
parse str' = go [] [Epsilon] str'
where
unwind [] vals = vals
unwind ('&':ops) (val1:val2:vals) = unwind ops (mkAnd val2 val1:vals)
unwind ('|':ops) (val1:val2:vals) = unwind ops (mkOr val2 val1:vals)
unwind ('s':ops) (val1:val2:vals) = unwind ops (mkSeq val2 val1:vals)
unwind (c:_) _ = error ("Unknown op character " ++ [c])
go opstack valstack "" =
-- trace ("go: has " ++ show opstack ++ " " ++ show valstack) $
if 1 + length opstack /= length valstack
then error ("Error parsing " ++ str')
else head $ unwind opstack valstack
go opstack valstack ('\\':'N':xs) =
go ('s':opstack) (Null:valstack) xs
go opstack valstack ('\\':'E':xs) =
go ('s':opstack) (Everything:valstack) xs
go opstack valstack ('\\':x:xs) =
go ('s':opstack) (C x:valstack) xs
go opstack valstack ('(':xs) =
go ('(':opstack) (Epsilon:valstack) xs
go opstack valstack ('|':xs) =
let (skipops, restops) = span (`elem` "&s") opstack
in go ('|':restops) (Epsilon:unwind skipops valstack) xs
go opstack valstack ('&':xs) =
let (skipops, restops) = span (`elem` "s") opstack
in go ('&':restops) (Epsilon:unwind skipops valstack) xs
go opstack valstack (')':xs) =
-- trace ("): has " ++ show opstack ++ " " ++ show valstack) $
let (myops, othops) = span (/= '(') opstack
in if null othops then error ("Extra closing paren in " ++ str')
else go ('s':tail othops) (unwind myops valstack) xs
go opstack valstack ('*':xs) =
go opstack (mkStar (head valstack) : tail valstack) xs
go opstack valstack ('!':xs) =
go opstack (mkNot (head valstack) : tail valstack) xs
go opstack valstack ('.':xs) =
go ('s':opstack) (Any:valstack) xs
go opstack valstack (x:xs) =
go ('s':opstack) (C x:valstack) xs
main :: IO ()
main = do
args' <- getArgs
let args = args' ++ [ ".", ".."]
args1 = head args
args2 = head $ tail args
re1 = simplify $ parse args1
re2 = simplify $ parse args2
print re1
print re2
print $ findDistinguishingString findMyNewChar re1 re2
print $ findDistinguishingString findMyNewChar' re1 re2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment