Last active
March 18, 2020 14:37
-
-
Save twitu/c77e6ad6f4c4b8e1d2cb86139a1ef3a1 to your computer and use it in GitHub Desktop.
Functional Parsers implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Parser.hs | |
module Parser | |
( token | |
) | |
where | |
import Prelude hiding ( (<*>) | |
, fail | |
, (*>) | |
, (<*) | |
, sequence | |
) | |
import Data.Char ( isDigit | |
, ord | |
, isAlpha | |
, isUpper | |
, isLower | |
, isAlphaNum | |
) | |
-- Parser definition | |
-- Returns a list of possible remaining symbols and parsed results, including an | |
-- empty list | |
type Parser symbol result = [symbol] -> [([symbol], result)] | |
-- Elementary parsers | |
-- parse a single symbol | |
symbol :: Eq s => s -> Parser s s | |
symbol a [] = [] | |
symbol a (x : xs) | a == x = [(xs, x)] | |
| otherwise = [] | |
-- parse a token (a list of symbols) | |
token :: Eq s => [s] -> Parser s [s] | |
token k xs | k == take n xs = [(drop n xs, k)] | |
| otherwise = [] | |
where n = length k | |
-- exercise 1 | |
satisfy :: (s -> Bool) -> Parser s s | |
satisfy p [] = [] | |
satisfy p (x : xs) = [ (xs, x) | p x ] | |
symbol' a = satisfy (a ==) | |
-- trivial parsers | |
-- parses empty string | |
epsilon :: Parser s () | |
epsilon xs = [(xs, ())] | |
-- always returns a fixed value | |
succeed :: r -> Parser s r | |
succeed v xs = [(xs, v)] | |
-- redeclaration | |
epsilon' = succeed () | |
-- always fails | |
fail :: Parser s r | |
fail xs = [] | |
-- Parser combinators | |
infixr 6 <*> | |
infixr 4 <|> | |
-- concatenates the operation of two parsers | |
-- returns a list of all combinations results when the two parser are applied in sequence | |
(<*>) :: Parser s a -> Parser s b -> Parser s (a, b) | |
(p1 <*> p2) xs = [ (xs2, (v1, v2)) | (xs1, v1) <- p1 xs, (xs2, v2) <- p2 xs1 ] | |
-- represents choice where both the result of both the parsers are returned | |
-- NOTE: That both must return the same type of result since the list of results must be homogenous | |
(<|>) :: Parser s a -> Parser s a -> Parser s a | |
(p1 <|> p2) xs = p1 xs ++ p2 xs | |
-- Parser transformers | |
-- drops all spaces before parsing a string | |
sp :: Parser Char a -> Parser Char a | |
sp p = p . dropWhile (== ' ') | |
-- filters parser results for those that are (), i.e. they have completely consumed the input | |
-- NOTE: This can also return an empty list because no parser has entirely consumed the input | |
just :: Parser s a -> Parser s a | |
just p = filter (null . fst) . p | |
infixr 5 <@ | |
-- the most important transformer as it applies a function to the result of a parser | |
(<@) :: Parser s a -> (a -> b) -> Parser s b | |
(p <@ f) xs = [ (ys, f v) | (ys, v) <- p xs ] | |
-- Exercise 3 | |
just' :: Parser s a -> Parser s a | |
just' p xs = [ v | v <- p xs, null . fst $ v ] | |
------------------------------------------------ | |
-- simple applications for transformations | |
digit :: Parser Char Int | |
digit = satisfy isDigit <@ f where f c = ord c - ord '0' | |
-- convenient type for parsing a string and getting the result | |
-- NOTE: will throw error if not complete parsing exists | |
type DetPars symbol result = [symbol] -> result | |
some :: Parser s a -> DetPars s a | |
some p = snd . head . just p | |
-- TODO: add examples here | |
data Tree = Nil | Bin (Tree, Tree) deriving (Show) | |
-- parses the recursive grammar for parsing parenthesis | |
-- parens = (parens) parens | Nil | |
-- this into a tree like structure using the Tree data type | |
parens :: Parser Char Tree | |
parens = | |
(symbol '(' <*> parens <*> symbol ')' <*> parens) | |
<@ (\(_, (x, (_, y))) -> Bin (x, y)) | |
<|> epsilon | |
<@ const Nil | |
-- a verbose but clearer defintion of parens, by writing the lambda as a separate function | |
parens' :: Parser Char Tree | |
parens' = | |
(symbol '(' <*> parens' <*> symbol ')' <*> parens') | |
<@ f | |
<|> epsilon | |
<@ const Nil | |
where f ('(', (x, (')', y))) = Bin (x, y) | |
-- Exercise 5 | |
-- epsilon <@ const Nil can be written as succeed Nil | |
-- because const takes two arguments and ignore whatever the second argument is | |
-- returning Nil. The behaviour will be exactly similar to succeed Nil which will return | |
-- Nil whatever be the input | |
parens'' :: Parser Char Tree | |
parens'' = | |
(symbol '(' <*> parens'' <*> symbol ')' <*> parens'') <@ f <|> succeed Nil | |
where f ('(', (x, (')', y))) = Bin (x, y) | |
------------------------------------------------- | |
infixr 6 <* | |
infixr 6 *> | |
-- concatenates two parsers but only takes the result of the first parser | |
(<*) :: Parser s a -> Parser s b -> Parser s a | |
p1 <* p2 = p1 <*> p2 <@ fst | |
-- concatenates two parsers but only takes the result of the second parser | |
(*>) :: Parser s a -> Parser s b -> Parser s b | |
p1 *> p2 = p1 <*> p2 <@ snd | |
open = symbol '(' | |
close = symbol ')' | |
-- rewriting parens | |
parenths :: Parser Char Tree | |
parenths = (open *> parens <* close) <*> parens <@ Bin <|> succeed Nil | |
-- Exercise 7 | |
-- counts the maximum depth of nesting in a balanced parentheses string | |
nesting :: Parser Char Int | |
nesting = (open *> nesting <* close) <*> nesting <@ f <|> succeed 0 | |
where f (x, y) = (1 + x) `max` y | |
-- higher order function for folding values across nested parentheses | |
-- by taking a starting value and a combine operation | |
foldparens :: ((a, a) -> a) -> a -> Parser Char a | |
foldparens f e = p where p = (open *> p <* close) <*> p <@ f <|> succeed e | |
-- implementing nesting and parens in terms of fold parens | |
parens''' = foldparens Bin Nil | |
nesting' = foldparens (\(x, y) -> (1 + x) `max` y) 0 | |
--------------------------------------------------- | |
-- more parser combinators | |
-- uncurried version of list acts as a helper function | |
listp :: (a, [a]) -> [a] | |
listp (x, xs) = x : xs | |
listp' :: (a, [a]) -> [a] | |
listp' = uncurry (:) | |
-- concatenates the result of the same parser again and again | |
many' :: Parser s a -> Parser s [a] | |
many' p = p <*> many p <@ listp <|> succeed [] | |
-- Exercise 9 | |
-- makes a special helper combinators to combine the results of two parsers as a list | |
-- However the the two parsers must be related in the types of their result, only then | |
-- can the list operator be applied on them | |
(<:*>) :: Parser s a -> Parser s [a] -> Parser s [a] | |
p1 <:*> p2 = p1 <*> p2 <@ listp | |
------------------------------------- | |
-- Exercise 10 | |
-- redefining many with the new transformation | |
many :: Parser s a -> Parser s [a] | |
many p = p <:*> many p <|> succeed [] | |
--------------------------------------------------- | |
-- Examples | |
-- calculate the value of natural number while parsing | |
-- NOTE: the helper function can also be written as ((+).(10*)) | |
natural :: Parser Char Int | |
natural = many digit <@ foldl f 0 where f x y = x * 10 + y | |
-- Exercise 11 | |
-- many1 parser accepts 1 or more results and concatenates them as a list | |
many1 :: Parser s a -> Parser s [a] | |
many1 p = p <:*> many p <|> p <@ (: []) | |
--------------------------------------------------- | |
-- represents a choice for the parser to parse or not to parse | |
-- returns both results so that further parsing can continue along both paths | |
option :: Parser s a -> Parser s [a] | |
option p = p <@ (: []) <|> succeed [] | |
-- Parsing strings while ignoring beginning and ending tokens | |
-- along with some common case examples | |
pack :: Parser s a -> Parser s b -> Parser s c -> Parser s b | |
pack s1 p s2 = s1 *> p <* s2 | |
parenthesized p = pack (symbol '(') p (symbol ')') | |
bracketed p = pack (symbol '[') p (symbol ']') | |
compound p = pack (token "begin") p (token "end") | |
-- computes a list of result while ignoring the delimiter separating them | |
-- the many parser takes a parser that ignores the delimiter and keeps the next element | |
-- along with some common examples | |
listOf :: Parser s a -> Parser s b -> Parser s [a] | |
listOf p delim = p <:*> many (delim *> p) <|> succeed [] | |
commaList, semicList :: Parser Char a -> Parser Char [a] | |
commaList p = listOf p (symbol ',') | |
semicList p = listOf p (symbol ';') | |
-- Exercise 12 | |
-- converts a list of parsers to a parser that returns a list | |
sequence :: [Parser s a] -> Parser s [a] | |
sequence = foldr (<:*>) (succeed []) | |
{- HLINT ignore sequence' -} | |
-- a bit verbose and readable definition | |
sequence' :: [Parser s a] -> Parser s [a] | |
sequence' [] = succeed [] | |
sequence' (x : xs) = x <:*> (sequence' xs) | |
-- combinator that evaluates all possible outcomes of a list of parsers | |
-- it fails if no parser is successful in parsing the input | |
choice :: [Parser s a] -> Parser s a | |
choice = foldr (<|>) fail | |
---------------------------------------------------------------- | |
-- Exercise 13 | |
-- define token in terms of sequence | |
token' :: String -> Parser Char String | |
token' word = sequence (map symbol word) | |
---------------------------------------------------------------- | |
-- helper function for applying arithmetic operations | |
ap2 (op, y) = (`op` y) | |
-- chainl does not drop the delimiter | |
-- it also applies a function to process the results along with any | |
-- meaning attached to the delimiter. This means that function f | |
-- must work on two arguments like a (`op` b) | |
chainl :: Parser s a -> Parser s (a -> a -> a) -> Parser s a | |
-- chainl p s = p <*> many (s <*> p) <@ f | |
chainl p s = p <*> many (s <*> p) <@ uncurry (foldl (flip ap2)) | |
-- Exercise 14 | |
ap1 (x, op) = (x `op`) | |
chainr :: Parser s a -> Parser s (a -> a -> a) -> Parser s a | |
chainr p s = many (p <*> s) <*> p <@ uncurry (flip (foldr ap1)) | |
------------------------------------------- | |
-- Working with options | |
-- an option parser returns no result or one result in a list | |
-- a handly option operator takes two functions to operate on both cases | |
p <?@ (no, yes) = p <@ f | |
where | |
f [] = no | |
f [x] = yes x | |
-- examples for working with natural and floating point numbers | |
fract :: Parser Char Float | |
fract = many digit <@ foldr f 0.0 where f d x = (x + fromIntegral d) / 10.0 | |
-- handling the fixed part of a floating point number | |
fixed :: Parser Char Float | |
fixed = | |
(integer <@ fromIntegral) | |
<*> (option (symbol '.' *> fract) <?@ (0.0, id)) | |
<@ uncurry (+) | |
-- Exercise 15 | |
integer :: Parser Char Int | |
integer = option (symbol '-') <*> natural <@ f | |
where | |
f ([], n) = n | |
f (_ , n) = -n -- this case will always have '-' as first argument | |
-- rewriting using optional combinator | |
integer' :: Parser Char Int | |
integer' = option (symbol '-') <?@ (id, const negate) <*> natural <@ ap | |
where ap (f, x) = f x | |
----------------------------------------------------------- | |
-- Exercise 16 | |
-- floating point numbers with optional exponents | |
-- example: 1.23E-3 is equivalent to 0.00123 | |
float :: Parser Char Float | |
float = fixed <*> (option (symbol 'E' *> integer) <?@ (0, id)) <@ f | |
where | |
f (val, pow) | pow < 0 = val * (1 / 10 ^ (-pow)) | |
| otherwise = val * 10 ^ pow | |
----------------------------------------------------------- | |
-- rewriting chainl and chainr using the option parser | |
-- chainr' :: Parser s a -> Parser s (a -> a -> a) -> Parser s a | |
-- chainr' p s = q | |
-- where q = p <*> (option (s <*> q) <?@ (id, ap1)) <@ flip (\f x -> f x) | |
-- transformations to reduce backtracking | |
-- take only the first result of a parser | |
-- so as to avoid computing unnecessary results | |
first :: Parser a b -> Parser a b | |
first p xs | null r = [] | |
| otherwise = [head r] | |
where r = p xs | |
-- greedy versions of many and many1 that try to take | |
-- parse the maxium number of input characters possible | |
-- this works because many and many1 are defined so as to | |
-- try to parse as many characters as possible before | |
-- backtracking | |
greedy = first . many | |
greedy1 = first . many1 | |
-- using greedy for a more efficient listOf | |
listOf' :: Parser s a -> Parser s b -> Parser s [a] | |
listOf' p delim = p <:*> greedy (delim *> p) <|> succeed [] | |
-- the next combinator takes the option if it exists | |
-- but does not fail if it doesn't exist | |
compulsion = first . option | |
-- Applying parsers on a grammar | |
data Expr = Con Int | |
| Var String | |
| Fun String [Expr] | |
| Expr :+: Expr | |
| Expr :-: Expr | |
| Expr :*: Expr | |
| Expr :/: Expr | |
deriving (Show) | |
-- helper | |
identifier :: Parser Char String | |
identifier = many (satisfy isAlpha) | |
term' :: Parser Char Expr | |
term' = chainr fact (symbol '*' <@ const (:*:) <|> symbol '/' <@ const (:/:)) | |
expr' :: Parser Char Expr | |
expr' = chainr term' (symbol '+' <@ const (:+:) <|> symbol '-' <@ const (:-:)) | |
ap' :: (String, String -> Expr) -> Expr | |
ap' (x, f) = f x | |
-- parse a fact keeping the precedence of operators in mind | |
fact :: Parser Char Expr | |
fact = | |
integer | |
<@ Con | |
<|> identifier | |
<*> (option (parenthesized (commaList expr)) <?@ (Var, flip Fun)) | |
<@ ap' | |
<|> parenthesized expr | |
-- generalizing application of operators | |
type Op a = (Char, a -> a -> a) | |
gen :: [Op a] -> Parser Char a -> Parser Char a | |
gen ops p = chainr p (choice (map f ops)) where f (s, c) = symbol s <@ const c | |
multis = [('*', (:*:)), ('/', (:/:))] | |
addis = [('+', (:+:)), ('-', (:-:))] | |
-- we redefine expr and term as a list of operations | |
-- chained on expressions in this case the list of | |
-- operations are contained in a list and applied in | |
-- order of precedence | |
expr'' = gen addis term'' | |
term'' = gen multis fact | |
-- redefining | |
expr''' = addis `gen` (multis `gen` fact) | |
-- and finally generalizing expr for more possible levels of operations | |
expr = foldr gen fact [addis, multis] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is an implementation of the classic Functional Parsers paper.
Example expressions