Last active
December 17, 2015 05:39
-
-
Save jagt/5559570 to your computer and use it in GitHub Desktop.
LR Parser using PLY
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# LR Parser using PLY | |
from sys import exit | |
import ply.lex as lex | |
import ply.yacc as yacc | |
from pprint import pprint | |
tokens = ( | |
'NAME', | |
'STRING', | |
'NUM', | |
'LBRACK', | |
'RBRACK', | |
'LBRACE', | |
'RBRACE', | |
'COMMA', | |
'EQUAL', | |
) | |
t_NAME = r'\w+' | |
t_LBRACK = r'\[' | |
t_RBRACK = r'\]' | |
t_LBRACE = r'{' | |
t_RBRACE = r'}' | |
t_COMMA = r',' | |
t_EQUAL = r'=' | |
def t_STRING(t): | |
r'".+"' | |
t.value = t.value[1:-1] | |
return t | |
def t_NUM(t): | |
r'[\d.]+' | |
t.value = float(t.value) if '.' in t.value else int(t.value) | |
return t | |
t_ignore = ' \t' | |
def t_newline(t): | |
r'\n+' | |
t.lexer.lineno += len(t.value) | |
def t_comment(t): | |
r'//.+' | |
def t_error(t): | |
print "Illegal characters '%s'... at line:%d" % (t.value[0:5], t.lexer.lineno) | |
exit(1) | |
lexer = lex.lex() | |
records = {} | |
# PLY strictly uses BNF, no EBNF support | |
# Bottom up recursion, MUST strictly follow returning expression value (write to p0) | |
# first rule as implicit starting rule | |
def p_expression_record(p): | |
'''record : NAME EQUAL value''' | |
is_recordlist = p[3][0] | |
if is_recordlist: | |
p[0] = (p[1], dict(p[3][1])) | |
else: | |
p[0] = (p[1], p[3][1]) | |
def p_expression_literal(p): | |
'''literal : STRING | |
| NUM''' | |
p[0] = p[1] | |
def p_expression_listouter(p): | |
'''listouter : LBRACK listmul RBRACK | |
| LBRACK RBRACK''' | |
if len(p) == 4: | |
p[0] = p[2] | |
else: | |
p[0] = [] | |
def p_expression_listmul(p): | |
'''listmul : listmul COMMA literal | |
| literal''' | |
if len(p) == 4: | |
p[1].append(p[3]) | |
p[0] = p[1] # list concat | |
else: | |
p[0] = [p[1]] | |
def p_expression_value(p): | |
'''value : LBRACE recordlist RBRACE | |
| literal | |
| listouter ''' | |
if len(p) == 4: | |
p[0] = (True, p[2]) | |
else: | |
p[0] = (False, p[1]) | |
def p_expression_recordlist(p): | |
'''recordlist : recordlist record | |
| record''' | |
if len(p) == 3: | |
p[1].append(p[2]) | |
p[0] = p[1] | |
else: | |
p[0] = [p[1]] | |
def p_error(p): | |
if p: | |
print 'Syntax error' | |
print p | |
else: | |
print 'Syntax error at EOF' | |
parser = yacc.yacc() | |
parser.parent = [records]; | |
if __name__ == '__main__': | |
s = ''' | |
NAME = { | |
KEY1 = "Value" | |
KEY2 = ["These are my twisted words", 253, 20.0] | |
NEST_NAME = { | |
// single line comment | |
KEY3 = "Value" | |
KEY4 = [] | |
NEST_NAME2 = { | |
// single line comment | |
KEY9 = "Value" | |
KEY0 = [] | |
} | |
NEST_NAME3 = { | |
// single line comment | |
KEYa = "Value" | |
KEYb = [] | |
} | |
} | |
KEY5 = [253, "Daily Days"] | |
NEST_NAME0 = { | |
// single line comment | |
KEYa = "Value" | |
KEYb = [] | |
} | |
KEY8 = ["Fuck", 253, 20.0] | |
} | |
''' | |
p = parser.parse(s) | |
pprint(p) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment