Last active
August 22, 2021 19:42
-
-
Save GuiBrandt/d8257426eb9947d3c478e582288128fb to your computer and use it in GitHub Desktop.
Regular Expressions in haskell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE StandaloneDeriving #-} | |
import Data.List (inits, tails) | |
import Control.Arrow ((***), (&&&)) | |
import Control.Monad (liftM2) | |
import System.Random (randomIO) | |
import qualified Text.Parsec as P | |
import Text.Parsec ((<|>)) | |
import Text.Parsec.Combinator | |
data Regexp a = Empty | |
| Literal a | |
| Kleene (Regexp a) | |
| Union (Regexp a) (Regexp a) | |
| Concat (Regexp a) (Regexp a) | |
instance Show a => Show (Regexp a) where | |
show Empty = "ε" | |
show (Literal c) = show c | |
show (Kleene r) = "(" ++ show r ++ ")*" | |
show (Union a b) = "(" ++ show a ++ "|" ++ show b ++ ")" | |
show (Concat a b) = "(" ++ show a ++ " " ++ show b ++ ")" | |
deriving instance Eq a => Eq (Regexp a) | |
splits :: [a] -> [([a], [a])] | |
splits = uncurry zip . (inits &&& tails) | |
accept :: Eq a => Regexp a -> [a] -> Bool | |
accept Empty = null | |
accept (Literal c) = (==) [c] | |
accept (Union a b) = liftM2 (||) (accept a) (accept b) | |
accept k@(Kleene r) = liftM2 (||) (accept Empty) (accept $ Concat r k) | |
accept (Concat a b) = or . fmap (uncurry (&&) . (accept a *** accept b)) . splits | |
generate :: Monad m => Regexp a -> m Bool -> m [a] | |
generate Empty = const $ return [] | |
generate (Literal c) = const $ return [c] | |
generate k@(Kleene r) = generate $ Union Empty (Concat r k) | |
generate (Union a b) = uncurry (=<<) . (flip generate &&& fmap (\choice -> if choice then a else b)) | |
generate (Concat a b) = (liftM2 . liftM2) (++) (generate a) (generate b) | |
parse :: String -> Either P.ParseError (Regexp Char) | |
parse = P.parse (regexp <|> Empty <$ P.eof) "Regexp" | |
where | |
parens = between (P.char '(') (P.char ')') | |
literal = Literal <$> P.alphaNum | |
kleene = Kleene <$> restricted <* P.char '*' | |
where restricted = literal <|> parens regexp | |
sequence = foldr1 Concat <$> many1 restricted | |
where restricted = P.try kleene <|> parens regexp <|> literal | |
union = foldr1 Union <$> sequence `sepBy1` P.char '|' | |
regexp = P.try union <|> sequence | |
main = do | |
input <- getLine | |
case parse input of | |
Left err -> print err | |
Right regex -> do | |
print regex | |
putStr "Example: " | |
gen <- generate regex randomIO | |
print gen | |
sequence_ . repeat $ | |
do line <- getLine | |
print $ accept regex line |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment