Created
July 26, 2021 16:02
-
-
Save adamse/6dd64c9cbcc6d6dbc68fd8a30469d841 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# language OverloadedStrings #-} | |
import Data.List (sort, nub, transpose) | |
import Data.String | |
-- keep: | |
-- | |
-- empty -- empty string | |
-- a b -- sequence of regexes | |
-- <char> -- match char | |
-- ? -- zero or one | |
-- * -- zero or more of the previous thing | |
-- | -- alternatives | |
data Regex | |
= Empty -- match empty string | |
| Any | |
| End | |
| C Char -- match character | |
| ZeroOrOne Regex | |
| ZeroOrMore Regex | |
| Sequence Regex Regex | |
| Alternative Regex Regex | |
deriving (Show) | |
instance IsString Regex where | |
fromString = mconcat . map C | |
instance Monoid Regex where | |
mempty = Empty | |
instance Semigroup Regex where | |
(<>) = Sequence | |
(<|>) = Alternative | |
tests = | |
[ exact "" "" | |
, exact ("a" <|> "b") "a" | |
, exact ("a" <|> "b") "b" | |
, exact (Sequence (Alternative (C 'a') (Sequence (C 'a') (C 'a'))) (C 'b')) "aab" | |
, exact (Sequence (Alternative (C 'a') (Sequence (C 'a') (C 'a'))) (C 'b')) "ab" | |
, exact (Sequence (ZeroOrOne (C 'a')) (C 'a')) "a" | |
, exact (Sequence (ZeroOrOne (C 'a')) (C 'a')) "aa" | |
, exact (Sequence (ZeroOrMore (C 'a')) (C 'a')) "a" | |
, exact (Sequence (ZeroOrMore (C 'a')) (C 'a')) "aa" | |
, exact (C 'a') "" | |
, exact (ZeroOrMore (Alternative Empty ((C 'b')))) "bb" | |
, exact End "" | |
, exact (Sequence (C 'c') End) "c" | |
, exact Empty "asdf" | |
] | |
interleave = concat . transpose | |
exact :: Regex -> String -> Bool | |
-- check if the empty string is in the leftovers | |
exact re string = "" `elem` work re string | |
match :: Regex -> String -> Bool | |
match re string = work re string /= [] | |
-- [] = no match | |
work :: Regex -> String -> [String] | |
work r string = case r of | |
-- eps | |
Empty -> [string] | |
-- $ | |
End -> | |
if string == "" | |
then [""] | |
else [] | |
-- . | |
Any -> case string of | |
(_:rest) -> [rest] | |
_ -> [] | |
-- <char> | |
C char -> case string of | |
(input_char:rest) | |
| input_char == char -> [rest] | |
_ -> [] | |
-- r1 | r2 | |
Alternative r1 r2 -> | |
interleave [work r1 string, work r2 string] | |
-- r1 r2 | |
Sequence r1 r2 -> | |
-- try r2 on all possible leftovers for r1 | |
interleave (map (work r2) (work r1 string)) | |
-- r ? | |
ZeroOrOne r -> | |
string : work r string | |
-- r * | |
ZeroOrMore r -> | |
string : work (Sequence r (ZeroOrMore r)) string |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment