Skip to content

Instantly share code, notes, and snippets.

@moriyoshi
Created May 24, 2010 19:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moriyoshi/412286 to your computer and use it in GitHub Desktop.
Save moriyoshi/412286 to your computer and use it in GitHub Desktop.
"""
Pascal / Delphi lexer
Based on the public-domain lex file by Scott A. Moore.
(http://www.moorecad.com/standardpascal/yacclex.html )
"""
import ply.lex
import re
__all__ = [
'tokens',
'createlexer',
'InvalidTokenError',
]
class InvalidTokenError(BaseException):
pass
tokens = (
'AND',
'ARRAY',
'AS',
'ASSIGNMENT',
'CASE',
'CHARACTER_STRING',
'CHARCODE',
'COLON',
'COMMA',
'CONST',
'DIV',
'DO',
'DOT',
'DOTDOT',
'DOWNTO',
'ELSE',
'END',
'EQUAL',
'EXCEPT',
'EXTERNAL',
'FINALLY',
'FOR',
'FORWARD',
'FUNCTION',
'GE',
'GOTO',
'GT',
'HEXADECIMALNUMBER',
'IDENTIFIER',
'IF',
'IN',
'IS',
'LABEL',
'LBRAC',
'LE',
'LPAREN',
'LT',
'MINUS',
'MOD',
'NIL',
'NOT',
'NOTEQUAL',
'OF',
'OR',
'OTHERWISE',
'PACKED',
'PBEGIN',
'PFILE',
'PLUS',
'PROCEDURE',
'PROGRAM',
'RBRAC',
'REALNUMBER',
'RECORD',
'REPEAT',
'RPAREN',
'SEMICOLON',
'SET',
'SHL',
'SHR',
'SLASH',
'STAR',
'STARSTAR',
'THEN',
'TO',
'TRY',
'TYPE',
'UNTIL',
'UPARROW',
'USES',
'VAR',
'WHILE',
'WITH',
'XOR',
)
keywords = {
r'and': 'AND',
r'array': 'ARRAY',
r'as': 'AS',
r'begin': 'PBEGIN',
r'case': 'CASE',
r'const': 'CONST',
r'div': 'DIV',
r'do': 'DO',
r'downto': 'DOWNTO',
r'else': 'ELSE',
r'end': 'END',
r'except': 'EXCEPT',
r'external': 'EXTERNAL',
r'file': 'PFILE',
r'finally': 'FINALLY',
r'for': 'FOR',
r'forward': 'FORWARD',
r'function': 'FUNCTION',
r'goto': 'GOTO',
r'if': 'IF',
r'in': 'IN',
r'is': 'IS',
r'mod': 'MOD',
r'nil': 'NIL',
r'not': 'NOT',
r'of': 'OF',
r'or': 'OR',
r'otherwise': 'OTHERWISE',
r'packed': 'PACKED',
r'procedure': 'PROCEDURE',
r'program': 'PROGRAM',
r'record': 'RECORD',
r'repeat': 'REPEAT',
r'set': 'SET',
r'shl': 'SHL',
r'shr': 'SHR',
r'then': 'THEN',
r'to': 'TO',
r'try': 'TRY',
r'type': 'TYPE',
r'until': 'UNTIL',
r'uses': 'USES',
r'var': 'VAR',
r'while': 'WHILE',
r'with': 'WITH',
r'xor': 'XOR',
}
t_ASSIGNMENT = r':='
t_COLON = r':'
t_COMMA = r','
t_DOT = r'\.(?!\.)'
t_DOTDOT = r'\.\.'
t_EQUAL = r'='
t_GE = r'>='
t_GT = r'>'
t_LBRAC = r'\['
t_LE = r'<='
t_LPAREN = r'\('
t_LT = r'<'
t_MINUS = r'-'
t_NOTEQUAL = r'<>'
t_PLUS = r'\+'
t_RBRAC = r'\]'
t_REALNUMBER = r'[0-9]+(\.[0-9]+)?(?:[eE][+-]?[0-9]+)?'
t_HEXADECIMALNUMBER = r'\$[0-9a-fA-F]+'
t_RPAREN = r'\)'
t_SEMICOLON = r';'
t_SLASH = r'/'
t_STAR = r'\*'
t_STARSTAR = r'\*\*'
t_UPARROW = r'\^'
t_CHARCODE = r'\#(?:[0-9]+|\$[0-9a-fA-F]+)'
t_ignore_WHITESPACES = r'[ \t\f]'
def t_IDENTIFIER(t):
r'[a-zA-Z_][a-zA-Z0-9_]*'
t.type = keywords.get(t.value.lower(), 'IDENTIFIER')
return t
def t_CHARACTER_STRING(t):
r"'(?:[^']|'')*'"
t.value = t.value[1:-1].replace("''", "'")
return t
def t_error(t):
raise InvalidToken(t.value)
def t_comment(t):
r'\(\*(.|\r|\n)*?\*\)|{[^}]*}|//.*?(?:\r\n|\r|\n)'
t.lexer.lineno += reduce(lambda x, y: x + 1, re.finditer(t_newline.__doc__, t.value), 0)
t.lexer.last_newline_pos = t.lexpos
def t_newline(t):
r'\r\n|\r|\n'
t.lexer.lineno += 1
t.lexer.last_newline_pos = t.lexpos
def createlexer(*arg, **kwarg):
retval = ply.lex.lex(*arg, **kwarg)
retval.last_newline_pos = 0
retval.file = ''
return retval
"""
Pascal / Delphi parser
Based on the public-domain yacc rule file by Scott A. Moore.
(http://www.moorecad.com/standardpascal/yacclex.html )
"""
import ply.yacc
from pascallexer import tokens
__all__ = [
'createparser',
]
class SyntaxError(BaseException):
def __init__(self, message='', file='', lineno=0, col=0):
BaseException.__init__(self, message, file, lineno, col)
@property
def message(self):
return self.args[0]
@property
def file(self):
return self.args[1]
@property
def lineno(self):
return self.args[2]
@property
def col(self):
return self.args[3]
def __str__(self):
if self.file is not None:
if self.lineno:
return "%s in %s on line %d at column %d" % (
self.message, self.file, self.lineno, self.col)
else:
return "%s in %s" % (self.message, self.file)
else:
return self.message
def p_file(p):
'''file : program
| module
'''
def p_program(p):
'''program : program_heading SEMICOLON block DOT
'''
def p_program_heading(p):
'''program_heading : PROGRAM IDENTIFIER
| PROGRAM IDENTIFIER LPAREN IDENTIFIER_list RPAREN
'''
def p_IDENTIFIER_list(p):
'''IDENTIFIER_list : IDENTIFIER_list COMMA IDENTIFIER
| IDENTIFIER
'''
def p_block(p):
'''block : uses_declaration_part label_declaration_part constant_definition_part type_definition_part variable_declaration_part procedure_and_function_declaration_part statement_part
'''
def p_module(p):
'''module : constant_definition_part type_definition_part variable_declaration_part procedure_and_function_declaration_part
'''
def p_uses_declaration_part(p):
'''uses_declaration_part : USES module_list SEMICOLON
|
'''
def p_module_list(p):
'''module_list : module_list COMMA IDENTIFIER
| IDENTIFIER
'''
def p_label_declaration_part(p):
'''label_declaration_part : LABEL label_list SEMICOLON
|
'''
def p_label_list(p):
'''label_list : label_list COMMA IDENTIFIER
| IDENTIFIER
'''
def p_constant_definition_part(p):
'''constant_definition_part : CONST constant_list
|
'''
def p_constant_list(p):
'''constant_list : constant_list constant_definition
| constant_definition
'''
def p_constant_definition(p):
'''constant_definition : IDENTIFIER EQUAL cexpression SEMICOLON
'''
def p_cexpression(p):
'''cexpression : csimple_expression
| csimple_expression EQUAL csimple_expression
| csimple_expression NOTEQUAL csimple_expression
| csimple_expression LT csimple_expression
| csimple_expression GT csimple_expression
| csimple_expression LE csimple_expression
| csimple_expression GE csimple_expression
| csimple_expression IN csimple_expression
'''
def p_csimple_expression(p):
'''csimple_expression : cterm
| csimple_expression PLUS cterm
| csimple_expression MINUS cterm
| csimple_expression OR cterm
| csimple_expression XOR cterm
'''
def p_cterm(p):
'''cterm : cfactor
| cterm STAR cfactor
| cterm SLASH cfactor
| cterm DIV cfactor
| cterm MOD cfactor
| cterm AND cfactor
| cterm SHL cfactor
| cterm SHR cfactor
'''
def p_cfactor(p):
'''cfactor : sign cfactor
| cexponentiation
'''
def p_cexponentiation(p):
'''cexponentiation : cprimary
| cprimary STARSTAR cexponentiation
'''
def p_cprimary(p):
'''cprimary : IDENTIFIER
| LPAREN cexpression RPAREN
| unsigned_constant
| NOT cprimary
'''
def p_constant(p):
'''constant : non_string
| sign non_string
| string_literal
'''
def p_string_literal(p):
'''string_literal : string_literal string_literal_component
| string_literal_component
'''
def p_string_literal_component(p):
'''string_literal_component : CHARACTER_STRING
| CHARCODE'''
def p_sign(p):
'''sign : PLUS
| MINUS
'''
def p_non_string(p):
'''non_string : IDENTIFIER
| number
'''
def p_type_definition_part(p):
'''type_definition_part : TYPE type_definition_list
|
'''
def p_type_definition_list(p):
'''type_definition_list : type_definition_list type_definition
| type_definition
'''
def p_type_definition(p):
'''type_definition : IDENTIFIER EQUAL type_denoter SEMICOLON
'''
def p_type_denoter(p):
'''type_denoter : IDENTIFIER
| new_type
'''
def p_new_type(p):
'''new_type : new_ordinal_type
| new_structured_type
| new_pointer_type
'''
def p_new_ordinal_type(p):
'''new_ordinal_type : enumerated_type
| subrange_type
'''
def p_enumerated_type(p):
'''enumerated_type : LPAREN IDENTIFIER_list RPAREN
'''
def p_subrange_type(p):
'''subrange_type : constant DOTDOT constant
'''
def p_new_structured_type(p):
'''new_structured_type : structured_type
| PACKED structured_type
'''
def p_structured_type(p):
'''structured_type : array_type
| record_type
| set_type
| file_type
'''
def p_array_type(p):
'''array_type : ARRAY optional_array_dims OF component_type
'''
def p_optional_array_dims(p):
'''optional_array_dims : LBRAC index_list RBRAC
|
'''
def p_index_list(p):
'''index_list : index_list COMMA index_type
| index_type
'''
def p_index_type(p):
'''index_type : ordinal_type
'''
def p_ordinal_type(p):
'''ordinal_type : new_ordinal_type
| IDENTIFIER
'''
def p_component_type(p):
'''component_type : type_denoter
'''
def p_record_type(p):
'''record_type : RECORD record_section_list END
| RECORD record_section_list SEMICOLON variant_part END
| RECORD variant_part END
'''
def p_record_section_list(p):
'''record_section_list : record_section_list SEMICOLON record_section
| record_section
'''
def p_record_section(p):
'''record_section : IDENTIFIER_list COLON type_denoter
'''
def p_variant_part(p):
'''variant_part : CASE variant_selector OF variant_list SEMICOLON
| CASE variant_selector OF variant_list
|
'''
def p_variant_selector(p):
'''variant_selector : tag_field COLON tag_type
| tag_type
'''
def p_variant_list(p):
'''variant_list : variant_list SEMICOLON variant
| variant
'''
def p_variant(p):
'''variant : case_constant_list COLON LPAREN record_section_list RPAREN
| case_constant_list COLON LPAREN record_section_list SEMICOLON variant_part RPAREN
| case_constant_list COLON LPAREN variant_part RPAREN
'''
def p_case_constant_list(p):
'''case_constant_list : case_constant_list COMMA case_constant
| case_constant
'''
def p_case_constant(p):
'''case_constant : constant
| constant DOTDOT constant
'''
def p_tag_field(p):
'''tag_field : IDENTIFIER
'''
def p_tag_type(p):
'''tag_type : IDENTIFIER
'''
def p_set_type(p):
'''set_type : SET OF base_type
'''
def p_base_type(p):
'''base_type : ordinal_type
'''
def p_file_type(p):
'''file_type : PFILE OF component_type
'''
def p_new_pointer_type(p):
'''new_pointer_type : UPARROW domain_type
'''
def p_domain_type(p):
'''domain_type : IDENTIFIER
'''
def p_variable_declaration_part(p):
'''variable_declaration_part : VAR variable_declaration_list SEMICOLON
|
'''
def p_variable_declaration_list(p):
'''variable_declaration_list : variable_declaration_list SEMICOLON variable_declaration
| variable_declaration
'''
def p_variable_declaration(p):
'''variable_declaration : IDENTIFIER_list COLON type_denoter
'''
def p_procedure_and_function_declaration_part(p):
'''procedure_and_function_declaration_part : proc_or_func_declaration_list SEMICOLON
|
'''
def p_proc_or_func_declaration_list(p):
'''proc_or_func_declaration_list : proc_or_func_declaration_list SEMICOLON proc_or_func_declaration
| proc_or_func_declaration
'''
def p_proc_or_func_declaration(p):
'''proc_or_func_declaration : procedure_declaration
| function_declaration
'''
def p_procedure_declaration(p):
'''procedure_declaration : procedure_heading SEMICOLON directive
| procedure_heading SEMICOLON procedure_block
'''
def p_procedure_heading(p):
'''procedure_heading : procedure_identification
| procedure_identification formal_parameter_list
'''
def p_directive(p):
'''directive : FORWARD
| EXTERNAL
'''
def p_formal_parameter_list(p):
'''formal_parameter_list : LPAREN formal_parameter_section_list RPAREN
'''
def p_formal_parameter_section_list(p):
'''formal_parameter_section_list : formal_parameter_section_list SEMICOLON formal_parameter_section
| formal_parameter_section
'''
def p_formal_parameter_section(p):
'''formal_parameter_section : value_parameter_specification
| variable_parameter_specification
| procedural_parameter_specification
| functional_parameter_specification
'''
def p_value_parameter_specification(p):
'''value_parameter_specification : IDENTIFIER_list COLON IDENTIFIER
'''
def p_variable_parameter_specification(p):
'''variable_parameter_specification : VAR IDENTIFIER_list COLON IDENTIFIER
'''
def p_procedural_parameter_specification(p):
'''procedural_parameter_specification : procedure_heading
'''
def p_functional_parameter_specification(p):
'''functional_parameter_specification : function_heading
'''
def p_procedure_identification(p):
'''procedure_identification : PROCEDURE IDENTIFIER
'''
def p_procedure_block(p):
'''procedure_block : block
'''
def p_function_declaration(p):
'''function_declaration : function_heading SEMICOLON directive
| function_identification SEMICOLON function_block
| function_heading SEMICOLON function_block
'''
def p_function_heading(p):
'''function_heading : FUNCTION IDENTIFIER COLON result_type
| FUNCTION IDENTIFIER formal_parameter_list COLON result_type
'''
def p_result_type(p):
'''result_type : IDENTIFIER
'''
def p_function_identification(p):
'''function_identification : FUNCTION IDENTIFIER
'''
def p_function_block(p):
'''function_block : block
'''
def p_statement_part(p):
'''statement_part : variable_declaration_part compound_statement
'''
def p_compound_statement(p):
'''compound_statement : PBEGIN statement_sequence END
'''
def p_statement_sequence(p):
'''statement_sequence : statement_sequence SEMICOLON statement
| statement
'''
def p_statement(p):
'''statement : open_statement
| closed_statement
'''
def p_open_statement(p):
'''open_statement : IDENTIFIER COLON non_labeled_open_statement
| non_labeled_open_statement
'''
def p_closed_statement(p):
'''closed_statement : IDENTIFIER COLON non_labeled_closed_statement
| non_labeled_closed_statement
'''
def p_non_labeled_closed_statement(p):
'''non_labeled_closed_statement : assignment_statement
| procedure_statement
| goto_statement
| compound_statement
| case_statement
| repeat_statement
| closed_with_statement
| closed_if_statement
| closed_while_statement
| closed_for_statement
|
'''
def p_non_labeled_open_statement(p):
'''non_labeled_open_statement : open_with_statement
| open_if_statement
| open_while_statement
| open_for_statement
| try_statement
'''
def p_repeat_statement(p):
'''repeat_statement : REPEAT statement_sequence UNTIL boolean_expression
'''
def p_open_while_statement(p):
'''open_while_statement : WHILE boolean_expression DO open_statement
'''
def p_closed_while_statement(p):
'''closed_while_statement : WHILE boolean_expression DO closed_statement
'''
def p_open_for_statement(p):
'''open_for_statement : FOR control_variable ASSIGNMENT initial_value direction final_value DO open_statement
'''
def p_closed_for_statement(p):
'''closed_for_statement : FOR control_variable ASSIGNMENT initial_value direction final_value DO closed_statement
'''
def p_open_with_statement(p):
'''open_with_statement : WITH record_variable_list DO open_statement
'''
def p_closed_with_statement(p):
'''closed_with_statement : WITH record_variable_list DO closed_statement
'''
def p_open_if_statement(p):
'''open_if_statement : IF boolean_expression THEN statement
| IF boolean_expression THEN closed_statement ELSE open_statement
'''
def p_closed_if_statement(p):
'''closed_if_statement : IF boolean_expression THEN closed_statement ELSE closed_statement
'''
def p_try_statement(p):
'''try_statement : TRY statement_sequence EXCEPT statement_sequence END
| TRY statement_sequence FINALLY statement_sequence END
'''
def p_assignment_statement(p):
'''assignment_statement : variable_access ASSIGNMENT expression
'''
def p_variable_access(p):
'''variable_access : IDENTIFIER
| IDENTIFIER params
| field_designator
| field_designator params
| indexed_variable
| variable_access UPARROW
'''
def p_indexed_variable(p):
'''indexed_variable : element LBRAC index_expression_list RBRAC
'''
def p_index_expression_list(p):
'''index_expression_list : index_expression_list COMMA index_expression
| index_expression
'''
def p_index_expression(p):
'''index_expression : expression
'''
def p_field_designator(p):
'''field_designator : element DOT IDENTIFIER
'''
def p_procedure_statement(p):
'''procedure_statement : IDENTIFIER
| IDENTIFIER params
| field_designator
| field_designator params
'''
def p_params(p):
'''params : LPAREN actual_parameter_list RPAREN
'''
def p_actual_parameter_list(p):
'''actual_parameter_list : actual_parameter_list COMMA actual_parameter
| actual_parameter
'''
def p_actual_parameter(p):
'''actual_parameter : expression
| expression COLON expression
| expression COLON expression COLON expression
'''
def p_goto_statement(p):
'''goto_statement : GOTO IDENTIFIER
'''
def p_case_statement(p):
'''case_statement : CASE case_index OF case_list_element_list END
| CASE case_index OF case_list_element_list SEMICOLON END
| CASE case_index OF case_list_element_list SEMICOLON otherwisepart statement END
| CASE case_index OF case_list_element_list SEMICOLON otherwisepart statement SEMICOLON END
'''
def p_case_index(p):
'''case_index : expression
'''
def p_case_list_element_list(p):
'''case_list_element_list : case_list_element_list SEMICOLON case_list_element
| case_list_element
'''
def p_case_list_element(p):
'''case_list_element : case_constant_list COLON statement
'''
def p_otherwisepart(p):
'''otherwisepart : OTHERWISE
| OTHERWISE COLON
| ELSE
'''
def p_control_variable(p):
'''control_variable : IDENTIFIER
'''
def p_initial_value(p):
'''initial_value : expression
'''
def p_direction(p):
'''direction : TO
| DOWNTO
'''
def p_final_value(p):
'''final_value : expression
'''
def p_record_variable_list(p):
'''record_variable_list : record_variable_list COMMA variable_access
| variable_access
'''
def p_boolean_expression(p):
'''boolean_expression : expression
'''
def p_expression(p):
'''expression : simple_expression
| simple_expression EQUAL simple_expression
| simple_expression NOTEQUAL simple_expression
| simple_expression LT simple_expression
| simple_expression GT simple_expression
| simple_expression LE simple_expression
| simple_expression GE simple_expression
| simple_expression IN simple_expression
| simple_expression IS simple_expression
'''
def p_simple_expression(p):
'''simple_expression : term
| simple_expression PLUS term
| simple_expression MINUS term
| simple_expression OR term
| simple_expression XOR term
'''
def p_term(p):
'''term : factor
| term STAR factor
| term SLASH factor
| term DIV factor
| term MOD factor
| term AND factor
| term SHL factor
| term SHR factor
| term AS IDENTIFIER
'''
def p_factor(p):
'''factor : sign factor
| exponentiation
'''
def p_exponentiation(p):
'''exponentiation : primary
| primary STARSTAR exponentiation
'''
def p_primary(p):
'''primary : element
| NOT primary
'''
def p_element(p):
'''element : variable_access
| unsigned_constant
| set_constructor
| LPAREN expression RPAREN
'''
def p_unsigned_constant(p):
'''unsigned_constant : number
| string_literal
| NIL
'''
def p_set_constructor(p):
'''set_constructor : LBRAC member_designator_list RBRAC
| LBRAC RBRAC
'''
def p_member_designator_list(p):
'''member_designator_list : member_designator_list COMMA member_designator
| member_designator
'''
def p_member_designator(p):
'''member_designator : member_designator DOTDOT expression
| expression
'''
def p_number(p):
'''number : REALNUMBER
| HEXADECIMALNUMBER'''
def p_error(p):
if p:
raise SyntaxError("syntax error near '%s'" % p.value, p.lexer.file, p.lexer.lineno, p.lexer.lexpos - p.lexer.last_newline_pos)
else:
raise SyntaxError("syntax error", p.lexer.file, p.lexer.lineno, p.lexer.lexpos - p.lexer.last_newline_pos)
def createparser(*arg, **kwarg):
return ply.yacc.yacc(*arg, **kwarg)
from pascalparser import createparser
from pascallexer import createlexer
import sys
parser = createparser()
parser.parse(open(sys.argv[1]).read(), lexer=createlexer())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment