- -p オプション必須
- プログラムは暫定的
Created
July 8, 2015 16:52
-
-
Save K-atc/02b402e9812e066afb78 to your computer and use it in GitHub Desktop.
Whitespace Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import optparse | |
import sys | |
opt_run = True | |
opt_parse = False | |
opt_verbose = False | |
SPACE = ord(' ') # 32 | |
TAB = ord('\t') # 9 | |
LF = ord('\n') # 10 | |
PARAM_NONE = 0 | |
PARAM_NUM = 1 | |
PARAM_LABEL = 2 | |
# runtime error | |
class InterpreterException(Exception): | |
def __init__(cls, ip, message): | |
msg = "Error in ip=%d --> %s" % ((ip, message)) | |
Exception.__init__(cls, msg) | |
# IMP | |
IMPS = [[SPACE], [TAB, SPACE], [TAB, TAB], [LF], [TAB, LF]] | |
# whitespace commands (see http://compsoc.dur.ac.uk/whitespace/tutorial.php) | |
instructions = { | |
# Stack Manipulation (IMP: [Space]) | |
'PUSH': ((SPACE, SPACE), PARAM_NUM, 'Push the number onto the stack'), | |
'SDUPLI': ((SPACE, LF, SPACE), PARAM_NONE, 'Duplicate the top item on the stack'), | |
'SCOPY': ((SPACE, TAB, SPACE), PARAM_NUM, 'Copy the nth item on the stack onto the top of the stack'), | |
'SSWAP': ((SPACE, LF, TAB), PARAM_NONE, 'Swap the top two items on the stack'), | |
'SDISCARD': ((SPACE, LF, LF), PARAM_NONE, 'Discard the top item on the stack'), | |
'SSLIDE': ((SPACE, TAB, LF), PARAM_NUM, 'Slide n items off the stack, keeping the top item'), | |
# Arithmetic (IMP: [Tab][Space]) | |
'ADD': ((TAB, SPACE, SPACE, SPACE), PARAM_NONE, 'Addition'), | |
'SUB': ((TAB, SPACE, SPACE, TAB), PARAM_NONE, 'Subtraction'), | |
'MUL': ((TAB, SPACE, SPACE, LF), PARAM_NONE, 'Multiplication'), | |
'DIV': ((TAB, SPACE, TAB, SPACE), PARAM_NONE, 'Integer Division'), | |
'MOD': ((TAB, SPACE, TAB, TAB), PARAM_NONE, 'Modulo'), | |
# Heap Access (IMP: [Tab][Tab]) | |
'STORE': ((TAB, TAB, SPACE), PARAM_NONE, 'Store'), | |
'RETRIEVE': ((TAB, TAB, TAB), PARAM_NONE, 'Retrieve'), | |
# Flow Control (IMP: [LF]) | |
'LABEL': ((LF, SPACE, SPACE), PARAM_LABEL, 'Mark a location in the program'), | |
'CALL': ((LF, SPACE, TAB), PARAM_LABEL, 'Call a subroutine'), | |
'JUMP': ((LF, SPACE, LF), PARAM_LABEL, 'Jump unconditionally to a label'), | |
'JUMP-ZERO': ((LF, TAB, SPACE), PARAM_LABEL, 'Jump to a label if the top of the stack is zero'), | |
'JUMP-NEG': ((LF, TAB, TAB), PARAM_LABEL, 'Jump to a label if the top of the stack is negative'), | |
'RETURN': ((LF, TAB, LF), PARAM_NONE, 'End of subroutine'), | |
'END': ((LF, LF, LF), 0, 'End the program'), | |
# I/O (IMP: [Tab][LF]) | |
'OUT-CHAR': ((TAB, LF, SPACE, SPACE), PARAM_NONE, 'Output the character at the top of the stack'), | |
'OUT-NUM': ((TAB, LF, SPACE, TAB), PARAM_NONE, 'Output the number at the top of the stack'), | |
'IN-CHAR': ((TAB, LF, TAB, SPACE), PARAM_NONE, 'Read a character and place it in the location given by the top of the stack'), | |
'IN-NUM': ((TAB, LF, TAB, TAB), PARAM_NONE, 'Read a number and place it in the location given by the top of the stack') | |
} | |
class Compiler: | |
@staticmethod | |
def is_same_vactor(v1, v2): | |
if(len(v1) != len(v2)): | |
return False | |
for i in range(len(v1)): | |
if v1[i] != v2[i]: | |
return False | |
return True | |
@classmethod | |
def imp_compatible(cls, imp): | |
for test in IMPS: | |
if cls.is_same_vactor(imp, test): | |
return True | |
return False | |
@classmethod | |
def get_command_name(cls, imp, cmd): | |
tokens = imp + cmd | |
for cmd_name in instructions.keys(): | |
if cls.is_same_vactor(instructions[cmd_name][0], tokens): | |
return cmd_name | |
return '' | |
@staticmethod | |
def need_parameter(command_name): | |
if command_name == '': | |
return False | |
info = instructions[command_name] | |
return bool(info[1] > PARAM_NONE) | |
@staticmethod | |
def decode(program_text): | |
d = [] | |
for ichr in program_text: | |
if ichr == ' ': | |
d.append(SPACE) | |
elif ichr == '\t': | |
d.append(TAB) | |
elif ichr == '\n': | |
d.append(LF) | |
return d | |
@staticmethod | |
def format_parameter(cmd_name, parameter): | |
param_type = instructions[cmd_name][1] | |
if param_type == PARAM_NUM: | |
ret = 0 | |
for i in range(1,len(parameter)): | |
if parameter[i] == TAB: | |
ret += 1 | |
ret <<= 1 | |
ret >>= 1 | |
if parameter[0] == TAB: | |
ret *= -1 | |
return str(ret) | |
elif param_type == PARAM_LABEL: | |
ret = 0 | |
for x in parameter: | |
if x == TAB: | |
ret += 1 | |
ret <<= 1 | |
ret >>= 1 | |
return str(ret) | |
elif param_type == PARAM_NONE: | |
return '' | |
@classmethod | |
def intermediate_representation(cls, decoded): | |
intermediate = [] | |
imp = [] | |
command = [] | |
parameter = [] | |
imp_fetched = False | |
command_fetched = False | |
parameter_fetch = True | |
fetch = False | |
command_name = '' | |
parameter_value = None | |
care_x = False # fetch完了後にxをケアする必要があるかフラグ | |
for x in decoded: | |
# print "----" | |
# print "x = %d" % x | |
# print imp | |
# print command | |
# print parameter | |
if imp_fetched == False: # requred | |
if len(imp) > 0 and cls.imp_compatible(imp): | |
imp_fetched = True | |
# this x is a part of the command | |
# goto command fetch | |
else: | |
imp.append(x) | |
continue | |
if command_fetched == False: # requied | |
command_name = cls.get_command_name(imp, command) | |
if len(command) > 10: | |
break | |
# print "command_name = %s" % command_name | |
if len(command) > 0 and command_name != '': | |
command_fetched = True | |
if cls.need_parameter(command_name): | |
parameter_fetch = True | |
# this x is a part of the command | |
# goto parameter fetch | |
else: | |
# Note: ここの時点で x は宙ぶらりん | |
care_x = True | |
parameter_fetch = False | |
fetch = True | |
# goto fetch | |
else: | |
command.append(x) | |
continue | |
if parameter_fetch: | |
if len(parameter) > 0 and x == LF: # ここで x 消費 | |
parameter_value = cls.format_parameter(command_name, parameter) | |
parameter_fetch = False | |
fetch = True | |
else: | |
parameter.append(x) | |
continue | |
if fetch: | |
# print "fetch done" | |
# print "command_name = %s" % command_name | |
# print "parameter_value = %s" % parameter_value | |
if parameter_value != None: | |
intermediate.append([command_name, parameter_value]) | |
else: | |
intermediate.append([command_name]) | |
imp = [] | |
command = [] | |
parameter = [] | |
imp_fetched = False | |
command_fetched = False | |
parameter_fetch = True | |
fetch = False | |
parameter_value = None | |
if care_x: # 宙ぶらりんな x を入れてあげる | |
imp = [x] | |
care_x = False | |
# for last char(this should be last of END sequence(LF)) | |
# print "--- last ---" | |
# print imp | |
# print command | |
command_name = cls.get_command_name(imp, command) | |
intermediate.append([command_name]) | |
return intermediate | |
class Print: | |
@staticmethod | |
def decode(codes): | |
print codes | |
@staticmethod | |
def intermediate(codes): | |
# print "@Print.intermediate: %s" % codes | |
for code in codes: | |
intext = "" | |
if len(code[0]) < 8: | |
intext = "\t\t".join(code) | |
else: | |
intext = "\t".join(code) | |
intext += "\t" | |
if len(code) < 2: | |
if len(code[0]) < 8: | |
intext += "\t\t" | |
else: | |
intext += "\t" | |
intext += "\t;%s" % instructions[code[0]][2] | |
print "%s" % intext | |
if __name__ == '__main__': | |
parser = optparse.OptionParser() | |
parser.add_option("-v", "--verbose", action="store_true", default=False, help="Activate verbose mode") | |
# parser.add_option("-s", "--stack", action="store_true", default=False, help="Show the stack after each intruction execution") | |
parser.add_option("-p", "--parse", action="store_true", default=False, help="Pause the execution after each instruction") | |
(opts, args) = parser.parse_args() | |
if len(args) != 1: | |
print "Please specify the filename of the program" | |
parser.print_help() | |
print "Whitespace interpreter by K_atc" | |
print "http://katc.sakura.ne.jp/" | |
sys.exit(-1) | |
# Read options | |
opt_verbose = opts.verbose | |
opt_parse = opts.parse | |
if opt_parse: | |
opt_run = False | |
f = open(args[0]) | |
program_text = f.read() | |
f.close() | |
# print "same_vector = %s" % Compiler.is_same_vactor([1,2,3], [2,2,2]) | |
# print "same_vector = %s" % Compiler.is_same_vactor([1,2,3], [1,2,3]) | |
decoded = Compiler.decode(program_text) | |
# Print.decode(decoded) | |
intermediate = Compiler.intermediate_representation(decoded) | |
if opt_run: | |
# VM.run(intermediate) | |
else: | |
if opt_parse: | |
Print.intermediate(intermediate) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment