Skip to content

Instantly share code, notes, and snippets.

@adamse
Created July 26, 2021 16:02
Show Gist options
  • Save adamse/6dd64c9cbcc6d6dbc68fd8a30469d841 to your computer and use it in GitHub Desktop.
Save adamse/6dd64c9cbcc6d6dbc68fd8a30469d841 to your computer and use it in GitHub Desktop.
{-# language OverloadedStrings #-}
import Data.List (sort, nub, transpose)
import Data.String
-- keep:
--
-- empty -- empty string
-- a b -- sequence of regexes
-- <char> -- match char
-- ? -- zero or one
-- * -- zero or more of the previous thing
-- | -- alternatives
data Regex
= Empty -- match empty string
| Any
| End
| C Char -- match character
| ZeroOrOne Regex
| ZeroOrMore Regex
| Sequence Regex Regex
| Alternative Regex Regex
deriving (Show)
instance IsString Regex where
fromString = mconcat . map C
instance Monoid Regex where
mempty = Empty
instance Semigroup Regex where
(<>) = Sequence
(<|>) = Alternative
tests =
[ exact "" ""
, exact ("a" <|> "b") "a"
, exact ("a" <|> "b") "b"
, exact (Sequence (Alternative (C 'a') (Sequence (C 'a') (C 'a'))) (C 'b')) "aab"
, exact (Sequence (Alternative (C 'a') (Sequence (C 'a') (C 'a'))) (C 'b')) "ab"
, exact (Sequence (ZeroOrOne (C 'a')) (C 'a')) "a"
, exact (Sequence (ZeroOrOne (C 'a')) (C 'a')) "aa"
, exact (Sequence (ZeroOrMore (C 'a')) (C 'a')) "a"
, exact (Sequence (ZeroOrMore (C 'a')) (C 'a')) "aa"
, exact (C 'a') ""
, exact (ZeroOrMore (Alternative Empty ((C 'b')))) "bb"
, exact End ""
, exact (Sequence (C 'c') End) "c"
, exact Empty "asdf"
]
interleave = concat . transpose
exact :: Regex -> String -> Bool
-- check if the empty string is in the leftovers
exact re string = "" `elem` work re string
match :: Regex -> String -> Bool
match re string = work re string /= []
-- [] = no match
work :: Regex -> String -> [String]
work r string = case r of
-- eps
Empty -> [string]
-- $
End ->
if string == ""
then [""]
else []
-- .
Any -> case string of
(_:rest) -> [rest]
_ -> []
-- <char>
C char -> case string of
(input_char:rest)
| input_char == char -> [rest]
_ -> []
-- r1 | r2
Alternative r1 r2 ->
interleave [work r1 string, work r2 string]
-- r1 r2
Sequence r1 r2 ->
-- try r2 on all possible leftovers for r1
interleave (map (work r2) (work r1 string))
-- r ?
ZeroOrOne r ->
string : work r string
-- r *
ZeroOrMore r ->
string : work (Sequence r (ZeroOrMore r)) string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment