Created
May 31, 2016 01:26
-
-
Save tippenein/a12699c8fe4a986563a99a93de2169ee to your computer and use it in GitHub Desktop.
an incomplete regex implementation (moved from old repo)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- regex implementation | |
module Main where | |
import Control.Exception | |
import Prelude hiding (max,min) | |
data Atom = Literal Char | |
| OneOf String | |
| NoneOf String | |
| Any | |
deriving Show | |
data QuantifiedAtom = QuantifiedAtom Atom Quantifier | |
deriving Show | |
data Quantifier = AtLeast Int | |
| Between Int Int | |
deriving Show | |
data Regex = Regex [QuantifiedAtom] | |
deriving Show | |
quantMin :: Quantifier -> Int | |
quantMin (AtLeast n) = n | |
quantMin (Between min _) = min | |
maxReached :: Quantifier -> Int -> Bool | |
maxReached (AtLeast _) _ = False | |
maxReached (Between _ max) n = n >= max | |
reMatch :: Regex -> String -> Bool | |
reMatch (Regex []) _ = True | |
reMatch (Regex (qa:qas)) cs = | |
case matchQA qa cs of | |
Just leftovers -> reMatch (Regex qas) leftovers | |
_ -> False | |
matchAtom :: Atom -> String -> Maybe String | |
matchAtom _ "" = Nothing | |
matchAtom Any (_:cs) = Just cs | |
matchAtom (Literal c) cs = if c==head cs then Just $ tail cs else Nothing | |
matchAtom (OneOf alts) cs = if head cs `elem` alts | |
then Just $ tail cs | |
else Nothing | |
matchAtom (NoneOf alts) cs = if head cs `elem` alts | |
then Nothing | |
else Just $ tail cs | |
matchQA :: QuantifiedAtom -> String -> Maybe String | |
matchQA = matchQA' 0 | |
where | |
matchQA' numMatched (QuantifiedAtom atom quant) cs = | |
if maxReached quant numMatched | |
then Just cs | |
else | |
case matchAtom atom cs of | |
Just leftovers -> matchQA' (numMatched+1) (QuantifiedAtom atom quant) leftovers | |
Nothing -> if numMatched >= quantMin quant then Just cs else Nothing | |
parseAtom :: String -> (Atom, String) | |
parseAtom ('[':'^':cs) = (NoneOf alts, cs') | |
where | |
alts = insideBracket cs | |
cs' = afterBracket cs | |
parseAtom ('[':cs) = (OneOf alts, cs') | |
where | |
alts = insideBracket cs | |
cs' = afterBracket cs | |
parseAtom ('.':cs) = (Any, cs) | |
parseAtom (c:cs) = (Literal c, cs) | |
parseAtom "" = error "parseAtom does not accept empty string" | |
insideBracket :: String -> String | |
insideBracket = takeWhile (/= ']') | |
afterBracket :: String -> String | |
afterBracket cs = tail $ dropWhile (/= ']') cs | |
parseQuantifier :: Char -> Maybe Quantifier | |
parseQuantifier '?' = Just $ Between 0 1 | |
parseQuantifier '*' = Just $ AtLeast 0 | |
parseQuantifier '+' = Just $ AtLeast 1 | |
parseQuantifier _ = Nothing | |
parseQuantifiedAtom :: String -> (QuantifiedAtom, String) | |
parseQuantifiedAtom cs = (QuantifiedAtom atom quant, leftovers) | |
where | |
(atom, cs') = parseAtom cs | |
(quant, leftovers) = | |
case cs' of | |
(c:cs'') -> case parseQuantifier c of | |
Just q -> (q, cs'') | |
Nothing -> (Between 1 1, (c:cs'')) | |
_ -> (Between 1 1, cs') | |
compile' :: String -> [QuantifiedAtom] -> [QuantifiedAtom] | |
compile' "" acc = reverse acc | |
compile' s acc = compile' restOfString (qa:acc) | |
where | |
(qa, restOfString) = parseQuantifiedAtom s | |
compile :: String -> Regex | |
compile rstr = Regex $ compile' rstr [] | |
test :: IO () | |
test = do | |
let reMatch' = reMatch . compile | |
assert (reMatch' ".at" "bat") putStr "." | |
assert (reMatch' "[hc]at" "hat") putStr "." | |
assert (reMatch' "[hc]at" "cat") putStr "." | |
assert (reMatch' "[^b]at" "hat") putStr "." | |
assert (not (reMatch' "[^b]at" "bat")) putStr "." | |
assert (not (reMatch' ".at" "at")) putStr "." | |
putStrLn "" | |
main :: IO () | |
main = return () |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment