Skip to content

Instantly share code, notes, and snippets.

@mniip
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mniip/9703507 to your computer and use it in GitHub Desktop.
Save mniip/9703507 to your computer and use it in GitHub Desktop.
A regex-like pattern matcher operating on arbitrary data types.
module Patterns
(
captures, match, residue,
maybeCaptures, maybeMatch, gsub,
capture, captureMap, captureGuard, captureEq, lookAhead, lookAheadNot,
attach, orElse, tryOne, greedySome, lazySome, greedyMany, lazyMany,
times,
always, never, end, anything, value, notValue, range, anyOf, noneOf, lambda,
list, balanced,
numeric, hexadecimal, lowercase, uppercase, alpha, alphanumeric, ascii,
whitespace
) where
-- | The 'Match' type encapsulates a possible expression match. Expressions
-- always return a list of matches, from which, the first is the usually
-- required one.
-- Match captures match residue
data Match a = Match [[a]] [a] [a] deriving Show
-- | 'cmatch' creates a Match with no captures
cmatch :: [a] -> [a] -> Match a
cmatch = Match []
-- | 'mprepend' prepends a character to a list of matches
mprepend :: a -> [Match a] -> [Match a]
mprepend d = map (\(Match c m r) -> Match c (d:m) r)
-- | 'captures' returns a Match's captures
captures :: Match a -> [[a]]
captures (Match c _ _) = c
-- | 'match' returns part of the string consumed by the pattern
match :: Match a -> [a]
match (Match _ m _) = m
-- | 'residue' returns a Match's residue, that is, part of the string not
-- consumed by the pattern yet
residue :: Match a -> [a]
residue (Match _ _ r) = r
-- | 'maybeCaptures' matches a pattern against a string, and returns captures
-- created by the least backtracked Match, if any
maybeCaptures :: Pattern a -> [a] -> Maybe [[a]]
maybeCaptures f ds = case f ds of
[] -> Nothing
(r:_) -> Just $ captures r
-- | 'maybeMatch' matches a pattern against a string, ignoring any captures, and
-- returns part of the string consumed by the least backtracked Match, if any
maybeMatch :: Pattern a -> [a] -> Maybe [a]
maybeMatch f ds = case f ds of
[] -> Nothing
(r:_) -> Just $ match r
-- | 'gsub f l d' finds all non-overlapping (least-backtraced) occurences of 'f'
-- in 'd', pipes their captures through 'l', and then puts back into 'd' in
-- respective places
gsub :: Pattern a -> ([[a]] -> [a]) -> [a] -> [a]
gsub _ _ [] = []
gsub f l dd@(d:ds) = case f dd of
[] -> d:(gsub f l ds)
(Match c _ r:_) -> (l c) ++ (gsub f l r)
-- | The 'Pattern' type encapsulates an expression. It is a function, which,
-- when being fed an input string, returns a list of possible ways to parse that
-- input string, with the first attempted parse being first and so on.
type Pattern a = [a] -> [Match a]
-- -----------------------------------------------------------------------------
-- Pattern operators
-- | 'capture f' captures whatever 'f' has matched
capture :: Pattern a -> Pattern a
capture f ds = map (\(Match c h t) -> (Match (h:c) h t)) $ f ds
-- | 'captureMap l f' changed 'f's captures using 'l'
captureMap :: ([[a]] -> [[a]]) -> Pattern a -> Pattern a
captureMap l f ds = map (\(Match c h t) -> Match (l c) h t) $ f ds
-- | 'captureGuard l f' filters out those matches of 'f', for which 'l' returns
-- True
captureGuard :: ([[a]] -> Bool) -> Pattern a -> Pattern a
captureGuard l f ds = filter (\(Match c _ _) -> l c) $ f ds
-- | 'captureEq a b f' filters those matches of 'f', for which a'th capture
-- equals b'th capture. If one of the required captures is not present, the
-- filter returns false. If both aren't, it returns True.
captureEq :: (Eq a) => Int -> Int -> Pattern a -> Pattern a
captureEq a b = captureGuard (\c -> (maybeIndex a c) == (maybeIndex b c))
where
maybeIndex :: Int -> [a] -> Maybe a
maybeIndex _ [] = Nothing
maybeIndex 0 (x:_) = Just x
maybeIndex n (x:c) = maybeIndex (n - 1) c
-- | 'lookAhead f' matches empty string if 'f' matches
lookAhead :: Pattern a -> Pattern a
lookAhead f ds = case f ds of
[] -> []
_ -> [cmatch [] ds]
-- | 'lookAheadNot f' matches empty string if 'f' does not match
lookAheadNot :: Pattern a -> Pattern a
lookAheadNot f ds = case f ds of
[] -> [cmatch [] ds]
_ -> []
-- | 'f `attach` g' matches 'f' followed by 'g'
infixr 6 `attach`
attach :: Pattern a -> Pattern a -> Pattern a
attach f g ds = [ Match (c1 ++ c2) (m1 ++ m2) r2 |
Match c1 m1 r1 <- f ds,
Match c2 m2 r2 <- g r1]
-- | 'f `orElse` g' matches 'f' and in case of failure matches 'g'
infixr 5 `orElse`
orElse :: Pattern a -> Pattern a -> Pattern a
orElse f g ds = (f ds) ++ (g ds)
-- | 'tryOne f' attempts to match 'f' once, and upon failure to do so, matches
-- empty string
tryOne :: Pattern a -> Pattern a
tryOne f ds = (f ds) ++ [cmatch [] ds]
-- | 'greedySome f' attempts to match 'f' as many times as possible, then one
-- time less, and so on, excluding the empty match. Backtracks matches that
-- matched 0 characters. If the input list is infinite and therefore given
-- pattern matches infinitely, neither of possible results will ever evaluate
greedySome :: Pattern a -> Pattern a
greedySome f ds = case f ds of
[] -> []
r -> [ Match (c1 ++ c2) (m1 ++ m2) r2 |
Match c1 m1 r1 <- r, not $ null m1,
Match c2 m2 r2 <- greedySome f r1] ++ r
-- | 'lazySome f' attempts to match 'f' 1 time, then 2 times, until it is
-- impossible to match any more. Backtracks matches that matched 0 characters.
lazySome :: Pattern a -> Pattern a
lazySome f ds = let r = f ds in
r ++ [ Match (c1 ++ c2) (m1 ++ m2) r2 |
Match c1 m1 r1 <- r, not $ null m1,
Match c2 m2 r2 <- lazySome f r1]
-- | 'greedyMany f' attempts to match 'f' as many times as possible, then one
-- time less, and so on, up to and including the empty match. Backtracks
-- matches that matched 0 characters. If the input list is infinite and
-- therefore given pattern matches infinitely, neither of possible results
-- will ever evaluate
greedyMany :: Pattern a -> Pattern a
greedyMany f ds = case f ds of
[] -> [cmatch [] ds]
r -> [ Match (c1 ++ c2) (m1 ++ m2) r2 |
Match c1 m1 r1 <- r, not $ null m1,
Match c2 m2 r2 <- greedyMany f r1] ++ [cmatch [] ds]
-- | 'lazyMany f' attempts to match 'f' 0 times, then 1 time, until it is
-- impossible to match any more. Backtracks matches that matched 0 characters.
lazyMany :: Pattern a -> Pattern a
lazyMany f ds = case f ds of
[] -> [cmatch [] ds]
r -> (cmatch [] ds) : [ Match (c1 ++ c2) (m1 ++ m2) r2 |
Match c1 m1 r1 <- r, not $ null m1,
Match c2 m2 r2 <- lazyMany f r1]
-- | 'times n f' matches 'f', repeated exactly 'n' times
times :: Int -> Pattern a -> Pattern a
times 0 _ = always
times n f = foldr1 attach $ replicate n f
-- -----------------------------------------------------------------------------
-- Patterns
-- | 'always' matches an empty string
always :: Pattern a
always ds = [cmatch [] ds]
-- | 'never' never matches
never :: Pattern a
never _ = []
-- | 'end' matches the end of input
end :: Pattern a
end [] = [cmatch [] []]
end _ = []
-- | 'anything' matches any value in input's head
anything :: Pattern a
anything [] = []
anything (d:ds) = [cmatch [d] ds]
-- | 'value x' matches input's head if it is 'x'
value :: (Eq a) => a -> Pattern a
value _ [] = []
value v (d:ds)
| v == d = [cmatch [d] ds]
| otherwise = []
-- | 'value x' matches input's head if it is not 'x'
notValue :: (Eq a) => a -> Pattern a
notValue _ [] = []
notValue v (d:ds)
| v /= d = [cmatch [d] ds]
| otherwise = []
-- | 'range a b' matches input's head if it lies within [a; b]
range :: (Ord a) => a -> a -> Pattern a
range _ _ [] = []
range a b (d:ds)
| d >= a && d <= b = [cmatch [d] ds]
| otherwise = []
-- | 'anyOf l' matches input's head if it is an element of 'l'
anyOf :: (Eq a) => [a] -> Pattern a
anyOf _ [] = []
anyOf l (d:ds)
| elem d l = [cmatch [d] ds]
| otherwise = []
-- | 'noneOf l' matches input's head if it is not an element of 'l'
noneOf :: (Eq a) => [a] -> Pattern a
noneOf _ [] = []
noneOf l (d:ds)
| notElem d l = [cmatch [d] ds]
| otherwise = []
-- | 'lambda l' matches input's head, if 'l' returns True for it
lambda :: (a -> Bool) -> Pattern a
lambda _ [] = []
lambda l (d:ds)
| l d = [cmatch [d] ds]
| otherwise = []
-- | 'list m' matches m, if the input begins with it
list :: (Eq a) => [a] -> Pattern a
list [] d = [cmatch [] d]
list _ [] = []
list (v:vs) (d:ds)
| v == d = mprepend d $ list vs ds
| otherwise = []
balanced :: (Eq a) => a -> a -> Pattern a
balanced a b (d:ds)
| a == d = mprepend d $ balance 1 a b ds
| otherwise = []
where
balance 0 _ _ ds = [cmatch [] ds]
balance _ _ _ [] = []
balance l a b (d:ds) = mprepend d $ balance (offset d a b l) a b ds
offset d a b l
| d == a = l + 1
| d == b = l - 1
| otherwise = l
-- -----------------------------------------------------------------------------
-- Useful String-wise patterns
numeric = range '0' '9'
hexadecimal = numeric `orElse` range 'a' 'f' `orElse` range 'A' 'F'
lowercase = range 'a' 'z'
uppercase = range 'A' 'Z'
alpha = lowercase `orElse` uppercase
alphanumeric = alpha `orElse` numeric `orElse` anyOf "_-"
ascii = range ' ' '~'
whitespace = anyOf "\t\n\v\f\r "
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment