Last active
March 23, 2022 14:33
-
-
Save schar/be42f76ab15c4d2987963d119983b4f4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
module Regexp ( | |
Regexp, | |
(<+>), (<>), star, zero, | |
match, anyc | |
) where | |
import Data.Char | |
import GHC.Exts (IsString (..)) | |
import Prelude hiding ((<>)) | |
-- Adapted from Pedro Vasconcelos, after Danvy & Nielsen "Defunctionalization | |
-- at work" (2001); Harper "Proof-directed debugging" (1999). | |
data Regexp = Zero -- nothing (0) | |
| One -- empty string (epsilon) | |
| Lit Char -- single character | |
| Plus Regexp Regexp -- union (+) | |
| Cat Regexp Regexp -- concatenation (.) | |
| Star Regexp -- repetition (*) | |
deriving Show | |
-- Smart constructors. These are what the students are actually exposed to. | |
infixl 6 <+> | |
infixl 7 <> | |
(<+>) :: Regexp -> Regexp -> Regexp | |
Zero <+> e = e | |
e <+> Zero = e | |
e1 <+> e2 = Plus e1 e2 | |
(<>) :: Regexp -> Regexp -> Regexp | |
Zero <> _ = Zero | |
_ <> Zero = Zero | |
One <> e = e | |
e <> One = e | |
e1 <> e2 = Cat e1 e2 | |
star :: Regexp -> Regexp | |
star Zero = One | |
star One = One | |
star (Star e) = Star e | |
star e = Star e | |
zero :: Regexp | |
zero = Zero | |
-- | |
type Cont = String -> Bool | |
accept :: Regexp -> String -> Cont -> Bool -- worker function | |
accept Zero _ _ = False | |
accept One cs k = k cs | |
accept (Lit c) (c':cs) k = c==c' && k cs | |
accept (Lit c) [] k = False | |
accept (Cat e1 e2) cs k = accept e1 cs (\cs' -> accept e2 cs' k) | |
accept (Plus e1 e2) cs k = accept e1 cs k || accept e2 cs k | |
accept (Star e) cs k = acceptStar e cs k | |
where | |
acceptStar e cs k | |
= k cs || accept e cs (\cs' -> cs'/= cs && acceptStar e cs' k) | |
-- `acceptStar` piles up as much `accept e cs (\cs' -> ... accept e cs' (...))` | |
-- as needed to consume all of `cs`. The `cs'/=cs` condition ensures that some | |
-- of `cs` is consumed on each call -- useful when `e` is `Star One` and `cs` | |
-- is a non-matching string. The smart constructors actually take care of this | |
-- already, by normalizing `star One` to `One`. | |
match :: Regexp -> String -> Bool | |
match re s = accept re s null | |
instance IsString Regexp where | |
fromString cs = foldr ((<>) . Lit) One cs | |
fromChars :: [Char] -> Regexp | |
fromChars = foldr ((<+>) . fromString . (:[])) Zero | |
anyc :: Regexp | |
anyc = fromChars $ map chr [33..126] ++ " \n\r\t" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment