Skip to content

Instantly share code, notes, and snippets.

@matthieubulte
Created December 26, 2014 23:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matthieubulte/089c384d2112e1df9965 to your computer and use it in GitHub Desktop.
Save matthieubulte/089c384d2112e1df9965 to your computer and use it in GitHub Desktop.
# flatten :: [[a]] -> [a]
def flatten(l):
return [x for lx in l for x in lx]
# first :: [a] -> a
def first(l):
return [l[0]] if l else []
# toString :: [Char] -> String
def toString(l):
return ''.join(l)
# type Parser a = String -> [(a, String)]
# the parser is a function returning:
# + the empty list if it failed
# + a list of tuples of the successful parsed
# value, and the unconsumed rest of the string
# result is a parser that alwys return the same
# output without consuming the input.
#
# result :: a -> Parser a
def result(x):
return lambda input: [(x, input)]
# zero is the parser that always fails.
#
# zero :: Parser a
zero = lambda input: []
# item consumes one character or fails if the input is
# empty.
#
# id :: Parser Char
def id(input):
if not input:
return []
else:
return [(input[0], input[1:])]
# the seq parser combinator represents the juxtaposition
# of two parsers, or the AND operator.
#
# seq :: Parser a -> Parser b -> Parser (a, b)
def seq(p, q):
return bind(p, lambda first:
bind(q, lambda second:
result((first, second))))
# apply a transformformation to the result of a `seq`.
#
# mapSeq :: Parser a -> Parser b -> (a -> b -> c) -> Parser c
def mapSeq(p, q, f):
return bind(seq(p, q), lambda res:
result(f(res[0], res[1])))
# fmap :: (a -> b) -> Parser a -> Parser b
def fmap(f, parser):
return bind(parser, lambda x: result(f(x)))
# seq can bring problems when composing several parsers
# by having nested tuple as results, thus we introduce the
# bind combinator to solve this problem.
#
# bind :: Parser a -> (a -> Parser b) -> Parser b
def bind(p, f):
def _parser(input):
return flatten([
f(p_res[0])(p_res[1])
for p_res in p(input)
])
return _parser
# sat creates a parser consuming one character
# satisfying the given property.
#
# sat :: (Char -> Bool) -> Parser Char
def sat(p):
return bind(id, lambda x:
result([x]) if p(x) else zero)
# plus combines the result of two parsers on the same input.
# It can be thought as the OR operator.
#
# plus :: Parser a -> Parser a -> Parser a
def plus(p, q):
return lambda input: p(input) + q(input)
# many consumes the input until the given parser fails.
# Can be thought as the * operator.
#
# many :: Parser a -> Parser[a]
def many(p):
_many = bind(p, lambda x:
bind(many(p), lambda xs:
result(x+xs)))
_parser = plus(_many, result([]))
return lambda input: first(_parser(input))
# many1 consumes one or more char from the input until
# the given parser fails.
# Can be thought as the + operator.
#
# many1 :: Parser a -> Parser [a]
def many1(p):
return bind(p, lambda x:
bind(many(p), lambda xs:
result(x + xs)))
# char creates a parser that consumes one char if the first
# char of the parser'S input is the same than the char's
# given parameter.
#
# char :: Char -> Parser Char
def char(c):
return sat(lambda x: x == c)
# digit is a parser consuming one char of the input if it's
# a digit.
#
# digit :: Parser Char
digit = sat(lambda x: x.isdigit())
# lower is a parser consuming one char of the input if it's
# a lower case letter.
#
# lower :: Parser Char
lower = sat(lambda x: 'a' <= x <= 'z')
# upper is a parser consuming one char of the input if it's
# an upper case letter.
#
# upper :: Parser Char
upper = sat(lambda x: 'A' <= x <= 'Z')
# upper is a parser consuming one char of the input if it's
# an upper or lower case letter.
#
# letter :: Parser Char
letter = plus(lower, upper)
# alphaNum is a parser consuming one char if it's a number
# or a letter.
#
# alphaNum :: Parser Char
alphaNum = plus(letter, digit)
# junk :: Parser String
junk = many(sat(lambda c: c == ' ' or c == '\n' or c == '\t'))
# token transform a parser to skip junk before applying
# the given parser.
#
# token :: Parser a -> Parser a
def token(p):
return mapSeq(junk, p, lambda _, x: toString(x))
# nat parses a natural number.
#
# natural :: Parser Nat
natural = bind(token(many1(digit)), lambda x:
result(int(x)))
# identifier is a parser for an identifier, starting with
# a letter, followed by zero or more letters or digits.
#
# identifier :: Parser String
identifier = fmap(toString, token(mapSeq(letter, many(alphaNum), lambda x,y: x+y)))
def lazy(p_factory):
return lambda input: p_factory()(input)
### LISP
lpar = token(char('('))
rpar = token(char(')'))
number = fmap(lambda x: [('NUMBER', x)], natural)
symbol = fmap(lambda x: [('SYMBOL', x)], identifier)
atom = fmap(lambda x: x, plus(symbol, number))
def in_parenthesis(parser):
return bind(lpar, lambda _:
bind(parser, lambda res:
bind(rpar, lambda _:
result(res))))
def _list():
_parser = in_parenthesis(many1(lazy(_s_expr)))
return fmap(lambda x: [('LIST', x)], _parser)
def _s_expr():
s_expr = lazy(_s_expr)
list = lazy(_list)
return plus(
atom,
plus(list,
in_parenthesis(
bind(s_expr, lambda s1:
bind(token(char('.')), lambda _:
bind(s_expr, lambda s2:
result([('CONS', s1 + s2)]))))
))
)
s_expr = _s_expr()
print s_expr('(abc def (123 h) (abc . def))')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment