Skip to content

Instantly share code, notes, and snippets.

@thomasballinger
Created August 12, 2015 20:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thomasballinger/a45a94dd4cee534d3033 to your computer and use it in GitHub Desktop.
Save thomasballinger/a45a94dd4cee534d3033 to your computer and use it in GitHub Desktop.
import re
def tokenize(s):
"""
>>> tokenize("(1.1 + 2 * 3)")
['(', '1.1', '+', '2', '*', '3', ')']
"""
return re.findall("[0123456789.a-zA-Z]+|[*+/\-()=]", s)
"""
Statement -> Print | Goto | Assignment
Print -> PRINT Expr
Expr -> BinOpExpr | Number | Identifier
Assign -> Ident '=' Expr
BinOpExpr -> Ident Op Expr
Num -> \d+
Ident -> \w+
Op -> [+-*/]
10 PRINT HI
20 GOTO 10
30 A = 2 + 3 - 4
parse functions take a string and an index
and return a node or None and an index
"""
def parseStatement(s, i):
"""
>>> toks = tokenize('A = 2 + 3 - 4')
>>> toks
['A', '=', '2', '+', '3', '-', '4']
>>> parseStatement(toks, 0)
(['=', 'A', ['+', 2, ['-', 3, 4]]], 7)
"""
node, i = parsePrint(s, i)
if node is not None:
return node, i
node, i = parseGoto(s, i)
if node is not None:
return node, i
node, i = parseAssignment(s, i)
if node is not None:
return node, i
return None, i
def parseAssignment(s, orig_i):
"""
>>> parseAssignment(['A', '='], 0)
(None, 0)
>>> parseAssignment(['A', '=', '1'], 0)
(['=', 'A', 1], 3)
"""
i = orig_i
if len(s) - i < 3:
return None, orig_i
ident, i = parseIdentifier(s, i)
if ident is None:
return None, orig_i
if s[i] != '=':
return None, orig_i
expr, i = parseExpression(s, i+1)
if expr is None:
return None, orig_i
return ['=', ident, expr], i
def parseIdentifier(s, i):
"""
>>> parseIdentifier(['asdf'], 0)
('asdf', 1)
"""
if re.match(r'\w+', s[i]):
return s[i], i+1
return None, i
def parseExpression(s, i):
"""
>>> parseExpression(['asdf'], 0)
('asdf', 1)
>>> parseExpression(['123'], 0)
(123, 1)
>>> parseExpression(['123', '+', '45'], 0)
(['+', 123, 45], 3)
"""
binop, i = parseBinOpExpr(s, i)
if binop:
return binop, i
thing, i = parseNumOrIdent(s, i)
if thing is not None:
return thing, i
return None, i
def parseNumOrIdent(s, i):
"""
>>> parseNumOrIdent(['123'], 0)
(123, 1)
"""
num, i = parseNum(s, i)
if num is not None:
return num, i
ident, i = parseIdentifier(s, i)
if ident:
return ident, i
return None, i
def parseBinOpExpr(s, orig_i):
"""
>>> parseBinOpExpr(['hi', '+', '3'], 0)
(['+', 'hi', 3], 3)
"""
if len(s) - orig_i < 3:
return None, orig_i
i = orig_i
thing, i = parseNumOrIdent(s, i)
if thing is None:
return None, orig_i
op, i = parseBinOp(s, i)
if op is None:
return None, orig_i
expr, i = parseExpression(s, i)
if expr is None:
return None, orig_i
return [op, thing, expr], i
def parseBinOp(s, i):
"""
>>> parseBinOp(['there', '+', 'hi'], 1)
('+', 2)
"""
if re.match(r'[+-/*]', s[i]):
return s[i], i+1
return None, i
def parseNum(s, i):
"""
>>> parseNum(['123', 'qwerty'], 0)
(123, 1)
"""
if re.match(r'\d+', s[i]):
return int(s[i]), i+1
return None, i
def parsePrint(s, i):
"""
>>> parsePrint(["PRINT", 1], 0)
('print', 1)
"""
if s[i].lower() == 'print':
return 'print', i+1
return None, i
def parseGoto(s, i):
"""
>>> parseGoto(["GOTO", 1], 0)
('goto', 1)
"""
if s[i].lower() == 'goto':
return 'goto', i+1
return None, i
if __name__ == '__main__':
import doctest
doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment