Skip to content

Instantly share code, notes, and snippets.

@dahlia
Created September 2, 2011 13:42
Show Gist options
  • Save dahlia/1188614 to your computer and use it in GitHub Desktop.
Save dahlia/1188614 to your computer and use it in GitHub Desktop.
Lisp Parser
""":mod:`lispparser` --- Lisp parser
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
__author__ = 'Hong Minhe <minhee' '@' 'dahlia.kr>'
__license__ = 'Public Domain'
""":mod:`lispparser.exc` --- Parsing errors
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
import exceptions
class SyntaxError(exceptions.SyntaxError):
@property
def column(self):
try:
rfind = self.code.rindex('\n', 0, self.offset)
except ValueError:
return self.offset
else:
return self.offset - rfind
@property
def line(self):
return self.code.count('\n', 0, self.offset)
@property
def offset_indicator(self):
line = self.code.splitlines()[self.line]
return '\n'.join([line, ' ' * self.column + '^'])
def print_syntax_error(self):
from sys import stderr
print>>stderr, type(self).__name__ + ':', str(self)
line = self.line + 1
col = self.column + 1
print>>stderr, 'Lexing error({0}:{1}):'.format(line, col)
print>>stderr, '{0.offset_indicator}'.format(self)
class LexingError(SyntaxError):
__slots__ = 'code', 'offset'
def __init__(self, code, offset, message=None):
SyntaxError.__init__(self, message)
self.code = code
self.offset = offset
class ParsingError(SyntaxError):
__slots__ = 'token',
def __init__(self, token, message=None):
SyntaxError.__init__(self, message)
self.token = token
@property
def code(self):
return self.token.code
@property
def offset(self):
return self.token.offset
class UnopenedParenthesisError(ParsingError):
def __init__(self, token, offset, message=None):
ParsingError.__init__(self, token, message)
self.parsing_offset = offset
""":mod:`lispparser.lexer` --- Lisp tokenizer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
import re
import collections
import lispparser.exc
TOKEN_RE = re.compile(r'''
(?P<open> [({] | \[ ) |
(?P<close> [)}] | \] ) |
(?P<number> \d+(?:\.\d+)? ) |
(?P<string> " (?: [^\\"] | \\ . )* " | ' (?: [^\\'] | \\ . )* ' ) |
(?P<symbol> [-_A-Za-z+*/%?!<>=][-_A-Za-z0-9+*/%?!<>=]* )
''', re.VERBOSE)
Token = collections.namedtuple('Token', 'type token code offset')
def tokenize(code):
i = 0
for match in TOKEN_RE.finditer(code):
cursor = match.start()
if i != cursor and code[i:cursor].strip() != '':
raise lispparser.exc.LexingError(code, i, 'unexpected token')
i = match.end()
for type, token in match.groupdict().iteritems():
if token:
yield Token(type, token, code, cursor)
break
if __name__ == '__main__':
import sys
code = sys.stdin.read()
try:
tokens = list(tokenize(code))
except lispparser.exc.LexingError as e:
e.print_syntax_error()
else:
for token in tokens:
print '{0.type}({0.offset}): {0.token}'.format(token)
""":mod:`lispparser.parser` --- Lisp parser
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
import lispparser.lexer
import lispparser.exc
class Symbol(object):
__slots__ = 'symbol',
def __init__(self, symbol):
self.symbol = symbol
def __str__(self):
return str(self.symbol)
def __unicode__(self):
return unicode(self.symbol)
def __repr__(self):
return 'Symbol({0!r})'.format(self.symbol)
def parse(tokens):
if isinstance(tokens, basestring):
tokens = lispparser.lexer.tokenize(tokens)
tokens = list(tokens)
goto = None
for i, token in enumerate(tokens):
if goto is not None:
if i <= goto:
continue
goto = None
type, string, code, offset = token
if type == 'open':
lst = []
try:
for el in parse(tokens[i + 1:]):
lst.append(el)
except lispparser.exc.UnopenedParenthesisError as e:
goto = i + e.parsing_offset + 1
yield lst
elif type == 'close':
raise lispparser.exc.UnopenedParenthesisError(
token, i,
'expected opened list'
)
elif type == 'number':
if '.' in token.token:
yield float(token.token)
else:
yield int(token.token)
elif type == 'symbol':
yield Symbol(token.token)
elif type == 'string':
yield eval(token.token)
else:
raise lispparser.exc.ParsingError(token, 'unexpected token type')
if __name__ == '__main__':
import sys
import pprint
try:
tree = list(parse(sys.stdin.read()))
except lispparser.exc.SyntaxError as e:
e.print_syntax_error()
else:
for form in tree:
pprint.pprint(form)
{define (factorial n)
{if [> n 1]
(* n (factorial (- n 1))
1)}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment