Skip to content

Instantly share code, notes, and snippets.

@tippenein
Created May 31, 2016 01:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tippenein/a12699c8fe4a986563a99a93de2169ee to your computer and use it in GitHub Desktop.
Save tippenein/a12699c8fe4a986563a99a93de2169ee to your computer and use it in GitHub Desktop.
an incomplete regex implementation (moved from old repo)
-- regex implementation
module Main where
import Control.Exception
import Prelude hiding (max,min)
data Atom = Literal Char
| OneOf String
| NoneOf String
| Any
deriving Show
data QuantifiedAtom = QuantifiedAtom Atom Quantifier
deriving Show
data Quantifier = AtLeast Int
| Between Int Int
deriving Show
data Regex = Regex [QuantifiedAtom]
deriving Show
quantMin :: Quantifier -> Int
quantMin (AtLeast n) = n
quantMin (Between min _) = min
maxReached :: Quantifier -> Int -> Bool
maxReached (AtLeast _) _ = False
maxReached (Between _ max) n = n >= max
reMatch :: Regex -> String -> Bool
reMatch (Regex []) _ = True
reMatch (Regex (qa:qas)) cs =
case matchQA qa cs of
Just leftovers -> reMatch (Regex qas) leftovers
_ -> False
matchAtom :: Atom -> String -> Maybe String
matchAtom _ "" = Nothing
matchAtom Any (_:cs) = Just cs
matchAtom (Literal c) cs = if c==head cs then Just $ tail cs else Nothing
matchAtom (OneOf alts) cs = if head cs `elem` alts
then Just $ tail cs
else Nothing
matchAtom (NoneOf alts) cs = if head cs `elem` alts
then Nothing
else Just $ tail cs
matchQA :: QuantifiedAtom -> String -> Maybe String
matchQA = matchQA' 0
where
matchQA' numMatched (QuantifiedAtom atom quant) cs =
if maxReached quant numMatched
then Just cs
else
case matchAtom atom cs of
Just leftovers -> matchQA' (numMatched+1) (QuantifiedAtom atom quant) leftovers
Nothing -> if numMatched >= quantMin quant then Just cs else Nothing
parseAtom :: String -> (Atom, String)
parseAtom ('[':'^':cs) = (NoneOf alts, cs')
where
alts = insideBracket cs
cs' = afterBracket cs
parseAtom ('[':cs) = (OneOf alts, cs')
where
alts = insideBracket cs
cs' = afterBracket cs
parseAtom ('.':cs) = (Any, cs)
parseAtom (c:cs) = (Literal c, cs)
parseAtom "" = error "parseAtom does not accept empty string"
insideBracket :: String -> String
insideBracket = takeWhile (/= ']')
afterBracket :: String -> String
afterBracket cs = tail $ dropWhile (/= ']') cs
parseQuantifier :: Char -> Maybe Quantifier
parseQuantifier '?' = Just $ Between 0 1
parseQuantifier '*' = Just $ AtLeast 0
parseQuantifier '+' = Just $ AtLeast 1
parseQuantifier _ = Nothing
parseQuantifiedAtom :: String -> (QuantifiedAtom, String)
parseQuantifiedAtom cs = (QuantifiedAtom atom quant, leftovers)
where
(atom, cs') = parseAtom cs
(quant, leftovers) =
case cs' of
(c:cs'') -> case parseQuantifier c of
Just q -> (q, cs'')
Nothing -> (Between 1 1, (c:cs''))
_ -> (Between 1 1, cs')
compile' :: String -> [QuantifiedAtom] -> [QuantifiedAtom]
compile' "" acc = reverse acc
compile' s acc = compile' restOfString (qa:acc)
where
(qa, restOfString) = parseQuantifiedAtom s
compile :: String -> Regex
compile rstr = Regex $ compile' rstr []
test :: IO ()
test = do
let reMatch' = reMatch . compile
assert (reMatch' ".at" "bat") putStr "."
assert (reMatch' "[hc]at" "hat") putStr "."
assert (reMatch' "[hc]at" "cat") putStr "."
assert (reMatch' "[^b]at" "hat") putStr "."
assert (not (reMatch' "[^b]at" "bat")) putStr "."
assert (not (reMatch' ".at" "at")) putStr "."
putStrLn ""
main :: IO ()
main = return ()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment