Skip to content

Instantly share code, notes, and snippets.

@twitu
Last active March 18, 2020 14:37
Show Gist options
  • Save twitu/c77e6ad6f4c4b8e1d2cb86139a1ef3a1 to your computer and use it in GitHub Desktop.
Save twitu/c77e6ad6f4c4b8e1d2cb86139a1ef3a1 to your computer and use it in GitHub Desktop.
Functional Parsers implementation
-- Parser.hs
module Parser
( token
)
where
import Prelude hiding ( (<*>)
, fail
, (*>)
, (<*)
, sequence
)
import Data.Char ( isDigit
, ord
, isAlpha
, isUpper
, isLower
, isAlphaNum
)
-- Parser definition
-- Returns a list of possible remaining symbols and parsed results, including an
-- empty list
type Parser symbol result = [symbol] -> [([symbol], result)]
-- Elementary parsers
-- parse a single symbol
symbol :: Eq s => s -> Parser s s
symbol a [] = []
symbol a (x : xs) | a == x = [(xs, x)]
| otherwise = []
-- parse a token (a list of symbols)
token :: Eq s => [s] -> Parser s [s]
token k xs | k == take n xs = [(drop n xs, k)]
| otherwise = []
where n = length k
-- exercise 1
satisfy :: (s -> Bool) -> Parser s s
satisfy p [] = []
satisfy p (x : xs) = [ (xs, x) | p x ]
symbol' a = satisfy (a ==)
-- trivial parsers
-- parses empty string
epsilon :: Parser s ()
epsilon xs = [(xs, ())]
-- always returns a fixed value
succeed :: r -> Parser s r
succeed v xs = [(xs, v)]
-- redeclaration
epsilon' = succeed ()
-- always fails
fail :: Parser s r
fail xs = []
-- Parser combinators
infixr 6 <*>
infixr 4 <|>
-- concatenates the operation of two parsers
-- returns a list of all combinations results when the two parser are applied in sequence
(<*>) :: Parser s a -> Parser s b -> Parser s (a, b)
(p1 <*> p2) xs = [ (xs2, (v1, v2)) | (xs1, v1) <- p1 xs, (xs2, v2) <- p2 xs1 ]
-- represents choice where both the result of both the parsers are returned
-- NOTE: That both must return the same type of result since the list of results must be homogenous
(<|>) :: Parser s a -> Parser s a -> Parser s a
(p1 <|> p2) xs = p1 xs ++ p2 xs
-- Parser transformers
-- drops all spaces before parsing a string
sp :: Parser Char a -> Parser Char a
sp p = p . dropWhile (== ' ')
-- filters parser results for those that are (), i.e. they have completely consumed the input
-- NOTE: This can also return an empty list because no parser has entirely consumed the input
just :: Parser s a -> Parser s a
just p = filter (null . fst) . p
infixr 5 <@
-- the most important transformer as it applies a function to the result of a parser
(<@) :: Parser s a -> (a -> b) -> Parser s b
(p <@ f) xs = [ (ys, f v) | (ys, v) <- p xs ]
-- Exercise 3
just' :: Parser s a -> Parser s a
just' p xs = [ v | v <- p xs, null . fst $ v ]
------------------------------------------------
-- simple applications for transformations
digit :: Parser Char Int
digit = satisfy isDigit <@ f where f c = ord c - ord '0'
-- convenient type for parsing a string and getting the result
-- NOTE: will throw error if not complete parsing exists
type DetPars symbol result = [symbol] -> result
some :: Parser s a -> DetPars s a
some p = snd . head . just p
-- TODO: add examples here
data Tree = Nil | Bin (Tree, Tree) deriving (Show)
-- parses the recursive grammar for parsing parenthesis
-- parens = (parens) parens | Nil
-- this into a tree like structure using the Tree data type
parens :: Parser Char Tree
parens =
(symbol '(' <*> parens <*> symbol ')' <*> parens)
<@ (\(_, (x, (_, y))) -> Bin (x, y))
<|> epsilon
<@ const Nil
-- a verbose but clearer defintion of parens, by writing the lambda as a separate function
parens' :: Parser Char Tree
parens' =
(symbol '(' <*> parens' <*> symbol ')' <*> parens')
<@ f
<|> epsilon
<@ const Nil
where f ('(', (x, (')', y))) = Bin (x, y)
-- Exercise 5
-- epsilon <@ const Nil can be written as succeed Nil
-- because const takes two arguments and ignore whatever the second argument is
-- returning Nil. The behaviour will be exactly similar to succeed Nil which will return
-- Nil whatever be the input
parens'' :: Parser Char Tree
parens'' =
(symbol '(' <*> parens'' <*> symbol ')' <*> parens'') <@ f <|> succeed Nil
where f ('(', (x, (')', y))) = Bin (x, y)
-------------------------------------------------
infixr 6 <*
infixr 6 *>
-- concatenates two parsers but only takes the result of the first parser
(<*) :: Parser s a -> Parser s b -> Parser s a
p1 <* p2 = p1 <*> p2 <@ fst
-- concatenates two parsers but only takes the result of the second parser
(*>) :: Parser s a -> Parser s b -> Parser s b
p1 *> p2 = p1 <*> p2 <@ snd
open = symbol '('
close = symbol ')'
-- rewriting parens
parenths :: Parser Char Tree
parenths = (open *> parens <* close) <*> parens <@ Bin <|> succeed Nil
-- Exercise 7
-- counts the maximum depth of nesting in a balanced parentheses string
nesting :: Parser Char Int
nesting = (open *> nesting <* close) <*> nesting <@ f <|> succeed 0
where f (x, y) = (1 + x) `max` y
-- higher order function for folding values across nested parentheses
-- by taking a starting value and a combine operation
foldparens :: ((a, a) -> a) -> a -> Parser Char a
foldparens f e = p where p = (open *> p <* close) <*> p <@ f <|> succeed e
-- implementing nesting and parens in terms of fold parens
parens''' = foldparens Bin Nil
nesting' = foldparens (\(x, y) -> (1 + x) `max` y) 0
---------------------------------------------------
-- more parser combinators
-- uncurried version of list acts as a helper function
listp :: (a, [a]) -> [a]
listp (x, xs) = x : xs
listp' :: (a, [a]) -> [a]
listp' = uncurry (:)
-- concatenates the result of the same parser again and again
many' :: Parser s a -> Parser s [a]
many' p = p <*> many p <@ listp <|> succeed []
-- Exercise 9
-- makes a special helper combinators to combine the results of two parsers as a list
-- However the the two parsers must be related in the types of their result, only then
-- can the list operator be applied on them
(<:*>) :: Parser s a -> Parser s [a] -> Parser s [a]
p1 <:*> p2 = p1 <*> p2 <@ listp
-------------------------------------
-- Exercise 10
-- redefining many with the new transformation
many :: Parser s a -> Parser s [a]
many p = p <:*> many p <|> succeed []
---------------------------------------------------
-- Examples
-- calculate the value of natural number while parsing
-- NOTE: the helper function can also be written as ((+).(10*))
natural :: Parser Char Int
natural = many digit <@ foldl f 0 where f x y = x * 10 + y
-- Exercise 11
-- many1 parser accepts 1 or more results and concatenates them as a list
many1 :: Parser s a -> Parser s [a]
many1 p = p <:*> many p <|> p <@ (: [])
---------------------------------------------------
-- represents a choice for the parser to parse or not to parse
-- returns both results so that further parsing can continue along both paths
option :: Parser s a -> Parser s [a]
option p = p <@ (: []) <|> succeed []
-- Parsing strings while ignoring beginning and ending tokens
-- along with some common case examples
pack :: Parser s a -> Parser s b -> Parser s c -> Parser s b
pack s1 p s2 = s1 *> p <* s2
parenthesized p = pack (symbol '(') p (symbol ')')
bracketed p = pack (symbol '[') p (symbol ']')
compound p = pack (token "begin") p (token "end")
-- computes a list of result while ignoring the delimiter separating them
-- the many parser takes a parser that ignores the delimiter and keeps the next element
-- along with some common examples
listOf :: Parser s a -> Parser s b -> Parser s [a]
listOf p delim = p <:*> many (delim *> p) <|> succeed []
commaList, semicList :: Parser Char a -> Parser Char [a]
commaList p = listOf p (symbol ',')
semicList p = listOf p (symbol ';')
-- Exercise 12
-- converts a list of parsers to a parser that returns a list
sequence :: [Parser s a] -> Parser s [a]
sequence = foldr (<:*>) (succeed [])
{- HLINT ignore sequence' -}
-- a bit verbose and readable definition
sequence' :: [Parser s a] -> Parser s [a]
sequence' [] = succeed []
sequence' (x : xs) = x <:*> (sequence' xs)
-- combinator that evaluates all possible outcomes of a list of parsers
-- it fails if no parser is successful in parsing the input
choice :: [Parser s a] -> Parser s a
choice = foldr (<|>) fail
----------------------------------------------------------------
-- Exercise 13
-- define token in terms of sequence
token' :: String -> Parser Char String
token' word = sequence (map symbol word)
----------------------------------------------------------------
-- helper function for applying arithmetic operations
ap2 (op, y) = (`op` y)
-- chainl does not drop the delimiter
-- it also applies a function to process the results along with any
-- meaning attached to the delimiter. This means that function f
-- must work on two arguments like a (`op` b)
chainl :: Parser s a -> Parser s (a -> a -> a) -> Parser s a
-- chainl p s = p <*> many (s <*> p) <@ f
chainl p s = p <*> many (s <*> p) <@ uncurry (foldl (flip ap2))
-- Exercise 14
ap1 (x, op) = (x `op`)
chainr :: Parser s a -> Parser s (a -> a -> a) -> Parser s a
chainr p s = many (p <*> s) <*> p <@ uncurry (flip (foldr ap1))
-------------------------------------------
-- Working with options
-- an option parser returns no result or one result in a list
-- a handly option operator takes two functions to operate on both cases
p <?@ (no, yes) = p <@ f
where
f [] = no
f [x] = yes x
-- examples for working with natural and floating point numbers
fract :: Parser Char Float
fract = many digit <@ foldr f 0.0 where f d x = (x + fromIntegral d) / 10.0
-- handling the fixed part of a floating point number
fixed :: Parser Char Float
fixed =
(integer <@ fromIntegral)
<*> (option (symbol '.' *> fract) <?@ (0.0, id))
<@ uncurry (+)
-- Exercise 15
integer :: Parser Char Int
integer = option (symbol '-') <*> natural <@ f
where
f ([], n) = n
f (_ , n) = -n -- this case will always have '-' as first argument
-- rewriting using optional combinator
integer' :: Parser Char Int
integer' = option (symbol '-') <?@ (id, const negate) <*> natural <@ ap
where ap (f, x) = f x
-----------------------------------------------------------
-- Exercise 16
-- floating point numbers with optional exponents
-- example: 1.23E-3 is equivalent to 0.00123
float :: Parser Char Float
float = fixed <*> (option (symbol 'E' *> integer) <?@ (0, id)) <@ f
where
f (val, pow) | pow < 0 = val * (1 / 10 ^ (-pow))
| otherwise = val * 10 ^ pow
-----------------------------------------------------------
-- rewriting chainl and chainr using the option parser
-- chainr' :: Parser s a -> Parser s (a -> a -> a) -> Parser s a
-- chainr' p s = q
-- where q = p <*> (option (s <*> q) <?@ (id, ap1)) <@ flip (\f x -> f x)
-- transformations to reduce backtracking
-- take only the first result of a parser
-- so as to avoid computing unnecessary results
first :: Parser a b -> Parser a b
first p xs | null r = []
| otherwise = [head r]
where r = p xs
-- greedy versions of many and many1 that try to take
-- parse the maxium number of input characters possible
-- this works because many and many1 are defined so as to
-- try to parse as many characters as possible before
-- backtracking
greedy = first . many
greedy1 = first . many1
-- using greedy for a more efficient listOf
listOf' :: Parser s a -> Parser s b -> Parser s [a]
listOf' p delim = p <:*> greedy (delim *> p) <|> succeed []
-- the next combinator takes the option if it exists
-- but does not fail if it doesn't exist
compulsion = first . option
-- Applying parsers on a grammar
data Expr = Con Int
| Var String
| Fun String [Expr]
| Expr :+: Expr
| Expr :-: Expr
| Expr :*: Expr
| Expr :/: Expr
deriving (Show)
-- helper
identifier :: Parser Char String
identifier = many (satisfy isAlpha)
term' :: Parser Char Expr
term' = chainr fact (symbol '*' <@ const (:*:) <|> symbol '/' <@ const (:/:))
expr' :: Parser Char Expr
expr' = chainr term' (symbol '+' <@ const (:+:) <|> symbol '-' <@ const (:-:))
ap' :: (String, String -> Expr) -> Expr
ap' (x, f) = f x
-- parse a fact keeping the precedence of operators in mind
fact :: Parser Char Expr
fact =
integer
<@ Con
<|> identifier
<*> (option (parenthesized (commaList expr)) <?@ (Var, flip Fun))
<@ ap'
<|> parenthesized expr
-- generalizing application of operators
type Op a = (Char, a -> a -> a)
gen :: [Op a] -> Parser Char a -> Parser Char a
gen ops p = chainr p (choice (map f ops)) where f (s, c) = symbol s <@ const c
multis = [('*', (:*:)), ('/', (:/:))]
addis = [('+', (:+:)), ('-', (:-:))]
-- we redefine expr and term as a list of operations
-- chained on expressions in this case the list of
-- operations are contained in a list and applied in
-- order of precedence
expr'' = gen addis term''
term'' = gen multis fact
-- redefining
expr''' = addis `gen` (multis `gen` fact)
-- and finally generalizing expr for more possible levels of operations
expr = foldr gen fact [addis, multis]
@twitu
Copy link
Author

twitu commented Mar 18, 2020

This is an implementation of the classic Functional Parsers paper.

  1. Install stack https://docs.haskellstack.org/en/stable/README/#how-to-install
  2. Download the Parser.hs file
  3. Run stack ghci Parser.hs to open a REPL like interface which loads the module
  4. Try out different expressions, some examples are given below

Example expressions

sentence = "You know, Hobbes, some days even my lucky rocket ship underpants don't help."
token "You know," sentence
token "You know, " <*> token "Hobbes" $ sentence
token "You know, " *> token "Hobbes" <@ (\x -> if x == "Hobbes" then "Tiger" else "Lulz") $ sentence

nesting "((()))"
nesting "((()())())"
nesting "((()())())))"

listOf (satisfy isAlpha) (symbol ',') "H,O,B,B,E,S"
listOf' (satisfy isAlpha) (symbol ',') "H,O,B,B,E,S"
some (listOf' (satisfy isAlpha) (symbol ',')) "H,O,B,B,E,S"

natural "419"
some (natural) "419"
some (natural) "419.03"
natural "419.03"
float "419.03"
float "419.03E-6"

arith = [('+', (+)), ('-', (-))]
gen arith digit "2+3-5"

-- expr for fibonacci numbers for sample grammar where last expression is the output
some (expr) "half(a,b,a+b/2)"

satisfy isAlpha <*> greedy (satisfy isAlpha <|> satisfy isDigit <|> symbol '_') <@ listp $ "cAlV1n_nd_H0bbEs"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment