Skip to content

Instantly share code, notes, and snippets.

@schar
Last active March 23, 2022 14:33
Show Gist options
  • Save schar/be42f76ab15c4d2987963d119983b4f4 to your computer and use it in GitHub Desktop.
Save schar/be42f76ab15c4d2987963d119983b4f4 to your computer and use it in GitHub Desktop.
{-# LANGUAGE OverloadedStrings #-}
module Regexp (
Regexp,
(<+>), (<>), star, zero,
match, anyc
) where
import Data.Char
import GHC.Exts (IsString (..))
import Prelude hiding ((<>))
-- Adapted from Pedro Vasconcelos, after Danvy & Nielsen "Defunctionalization
-- at work" (2001); Harper "Proof-directed debugging" (1999).
data Regexp = Zero -- nothing (0)
| One -- empty string (epsilon)
| Lit Char -- single character
| Plus Regexp Regexp -- union (+)
| Cat Regexp Regexp -- concatenation (.)
| Star Regexp -- repetition (*)
deriving Show
-- Smart constructors. These are what the students are actually exposed to.
infixl 6 <+>
infixl 7 <>
(<+>) :: Regexp -> Regexp -> Regexp
Zero <+> e = e
e <+> Zero = e
e1 <+> e2 = Plus e1 e2
(<>) :: Regexp -> Regexp -> Regexp
Zero <> _ = Zero
_ <> Zero = Zero
One <> e = e
e <> One = e
e1 <> e2 = Cat e1 e2
star :: Regexp -> Regexp
star Zero = One
star One = One
star (Star e) = Star e
star e = Star e
zero :: Regexp
zero = Zero
--
type Cont = String -> Bool
accept :: Regexp -> String -> Cont -> Bool -- worker function
accept Zero _ _ = False
accept One cs k = k cs
accept (Lit c) (c':cs) k = c==c' && k cs
accept (Lit c) [] k = False
accept (Cat e1 e2) cs k = accept e1 cs (\cs' -> accept e2 cs' k)
accept (Plus e1 e2) cs k = accept e1 cs k || accept e2 cs k
accept (Star e) cs k = acceptStar e cs k
where
acceptStar e cs k
= k cs || accept e cs (\cs' -> cs'/= cs && acceptStar e cs' k)
-- `acceptStar` piles up as much `accept e cs (\cs' -> ... accept e cs' (...))`
-- as needed to consume all of `cs`. The `cs'/=cs` condition ensures that some
-- of `cs` is consumed on each call -- useful when `e` is `Star One` and `cs`
-- is a non-matching string. The smart constructors actually take care of this
-- already, by normalizing `star One` to `One`.
match :: Regexp -> String -> Bool
match re s = accept re s null
instance IsString Regexp where
fromString cs = foldr ((<>) . Lit) One cs
fromChars :: [Char] -> Regexp
fromChars = foldr ((<+>) . fromString . (:[])) Zero
anyc :: Regexp
anyc = fromChars $ map chr [33..126] ++ " \n\r\t"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment