Skip to content

Instantly share code, notes, and snippets.

@K-atc
Created July 8, 2015 16:52
Show Gist options
  • Save K-atc/02b402e9812e066afb78 to your computer and use it in GitHub Desktop.
Save K-atc/02b402e9812e066afb78 to your computer and use it in GitHub Desktop.
Whitespace Parser

Whitespace Parser

  • -p オプション必須
  • プログラムは暫定的
#!/usr/bin/python
# -*- coding: utf-8 -*-
import optparse
import sys
opt_run = True
opt_parse = False
opt_verbose = False
SPACE = ord(' ') # 32
TAB = ord('\t') # 9
LF = ord('\n') # 10
PARAM_NONE = 0
PARAM_NUM = 1
PARAM_LABEL = 2
# runtime error
class InterpreterException(Exception):
def __init__(cls, ip, message):
msg = "Error in ip=%d --> %s" % ((ip, message))
Exception.__init__(cls, msg)
# IMP
IMPS = [[SPACE], [TAB, SPACE], [TAB, TAB], [LF], [TAB, LF]]
# whitespace commands (see http://compsoc.dur.ac.uk/whitespace/tutorial.php)
instructions = {
# Stack Manipulation (IMP: [Space])
'PUSH': ((SPACE, SPACE), PARAM_NUM, 'Push the number onto the stack'),
'SDUPLI': ((SPACE, LF, SPACE), PARAM_NONE, 'Duplicate the top item on the stack'),
'SCOPY': ((SPACE, TAB, SPACE), PARAM_NUM, 'Copy the nth item on the stack onto the top of the stack'),
'SSWAP': ((SPACE, LF, TAB), PARAM_NONE, 'Swap the top two items on the stack'),
'SDISCARD': ((SPACE, LF, LF), PARAM_NONE, 'Discard the top item on the stack'),
'SSLIDE': ((SPACE, TAB, LF), PARAM_NUM, 'Slide n items off the stack, keeping the top item'),
# Arithmetic (IMP: [Tab][Space])
'ADD': ((TAB, SPACE, SPACE, SPACE), PARAM_NONE, 'Addition'),
'SUB': ((TAB, SPACE, SPACE, TAB), PARAM_NONE, 'Subtraction'),
'MUL': ((TAB, SPACE, SPACE, LF), PARAM_NONE, 'Multiplication'),
'DIV': ((TAB, SPACE, TAB, SPACE), PARAM_NONE, 'Integer Division'),
'MOD': ((TAB, SPACE, TAB, TAB), PARAM_NONE, 'Modulo'),
# Heap Access (IMP: [Tab][Tab])
'STORE': ((TAB, TAB, SPACE), PARAM_NONE, 'Store'),
'RETRIEVE': ((TAB, TAB, TAB), PARAM_NONE, 'Retrieve'),
# Flow Control (IMP: [LF])
'LABEL': ((LF, SPACE, SPACE), PARAM_LABEL, 'Mark a location in the program'),
'CALL': ((LF, SPACE, TAB), PARAM_LABEL, 'Call a subroutine'),
'JUMP': ((LF, SPACE, LF), PARAM_LABEL, 'Jump unconditionally to a label'),
'JUMP-ZERO': ((LF, TAB, SPACE), PARAM_LABEL, 'Jump to a label if the top of the stack is zero'),
'JUMP-NEG': ((LF, TAB, TAB), PARAM_LABEL, 'Jump to a label if the top of the stack is negative'),
'RETURN': ((LF, TAB, LF), PARAM_NONE, 'End of subroutine'),
'END': ((LF, LF, LF), 0, 'End the program'),
# I/O (IMP: [Tab][LF])
'OUT-CHAR': ((TAB, LF, SPACE, SPACE), PARAM_NONE, 'Output the character at the top of the stack'),
'OUT-NUM': ((TAB, LF, SPACE, TAB), PARAM_NONE, 'Output the number at the top of the stack'),
'IN-CHAR': ((TAB, LF, TAB, SPACE), PARAM_NONE, 'Read a character and place it in the location given by the top of the stack'),
'IN-NUM': ((TAB, LF, TAB, TAB), PARAM_NONE, 'Read a number and place it in the location given by the top of the stack')
}
class Compiler:
@staticmethod
def is_same_vactor(v1, v2):
if(len(v1) != len(v2)):
return False
for i in range(len(v1)):
if v1[i] != v2[i]:
return False
return True
@classmethod
def imp_compatible(cls, imp):
for test in IMPS:
if cls.is_same_vactor(imp, test):
return True
return False
@classmethod
def get_command_name(cls, imp, cmd):
tokens = imp + cmd
for cmd_name in instructions.keys():
if cls.is_same_vactor(instructions[cmd_name][0], tokens):
return cmd_name
return ''
@staticmethod
def need_parameter(command_name):
if command_name == '':
return False
info = instructions[command_name]
return bool(info[1] > PARAM_NONE)
@staticmethod
def decode(program_text):
d = []
for ichr in program_text:
if ichr == ' ':
d.append(SPACE)
elif ichr == '\t':
d.append(TAB)
elif ichr == '\n':
d.append(LF)
return d
@staticmethod
def format_parameter(cmd_name, parameter):
param_type = instructions[cmd_name][1]
if param_type == PARAM_NUM:
ret = 0
for i in range(1,len(parameter)):
if parameter[i] == TAB:
ret += 1
ret <<= 1
ret >>= 1
if parameter[0] == TAB:
ret *= -1
return str(ret)
elif param_type == PARAM_LABEL:
ret = 0
for x in parameter:
if x == TAB:
ret += 1
ret <<= 1
ret >>= 1
return str(ret)
elif param_type == PARAM_NONE:
return ''
@classmethod
def intermediate_representation(cls, decoded):
intermediate = []
imp = []
command = []
parameter = []
imp_fetched = False
command_fetched = False
parameter_fetch = True
fetch = False
command_name = ''
parameter_value = None
care_x = False # fetch完了後にxをケアする必要があるかフラグ
for x in decoded:
# print "----"
# print "x = %d" % x
# print imp
# print command
# print parameter
if imp_fetched == False: # requred
if len(imp) > 0 and cls.imp_compatible(imp):
imp_fetched = True
# this x is a part of the command
# goto command fetch
else:
imp.append(x)
continue
if command_fetched == False: # requied
command_name = cls.get_command_name(imp, command)
if len(command) > 10:
break
# print "command_name = %s" % command_name
if len(command) > 0 and command_name != '':
command_fetched = True
if cls.need_parameter(command_name):
parameter_fetch = True
# this x is a part of the command
# goto parameter fetch
else:
# Note: ここの時点で x は宙ぶらりん
care_x = True
parameter_fetch = False
fetch = True
# goto fetch
else:
command.append(x)
continue
if parameter_fetch:
if len(parameter) > 0 and x == LF: # ここで x 消費
parameter_value = cls.format_parameter(command_name, parameter)
parameter_fetch = False
fetch = True
else:
parameter.append(x)
continue
if fetch:
# print "fetch done"
# print "command_name = %s" % command_name
# print "parameter_value = %s" % parameter_value
if parameter_value != None:
intermediate.append([command_name, parameter_value])
else:
intermediate.append([command_name])
imp = []
command = []
parameter = []
imp_fetched = False
command_fetched = False
parameter_fetch = True
fetch = False
parameter_value = None
if care_x: # 宙ぶらりんな x を入れてあげる
imp = [x]
care_x = False
# for last char(this should be last of END sequence(LF))
# print "--- last ---"
# print imp
# print command
command_name = cls.get_command_name(imp, command)
intermediate.append([command_name])
return intermediate
class Print:
@staticmethod
def decode(codes):
print codes
@staticmethod
def intermediate(codes):
# print "@Print.intermediate: %s" % codes
for code in codes:
intext = ""
if len(code[0]) < 8:
intext = "\t\t".join(code)
else:
intext = "\t".join(code)
intext += "\t"
if len(code) < 2:
if len(code[0]) < 8:
intext += "\t\t"
else:
intext += "\t"
intext += "\t;%s" % instructions[code[0]][2]
print "%s" % intext
if __name__ == '__main__':
parser = optparse.OptionParser()
parser.add_option("-v", "--verbose", action="store_true", default=False, help="Activate verbose mode")
# parser.add_option("-s", "--stack", action="store_true", default=False, help="Show the stack after each intruction execution")
parser.add_option("-p", "--parse", action="store_true", default=False, help="Pause the execution after each instruction")
(opts, args) = parser.parse_args()
if len(args) != 1:
print "Please specify the filename of the program"
parser.print_help()
print
print "Whitespace interpreter by K_atc"
print "http://katc.sakura.ne.jp/"
sys.exit(-1)
# Read options
opt_verbose = opts.verbose
opt_parse = opts.parse
if opt_parse:
opt_run = False
f = open(args[0])
program_text = f.read()
f.close()
# print "same_vector = %s" % Compiler.is_same_vactor([1,2,3], [2,2,2])
# print "same_vector = %s" % Compiler.is_same_vactor([1,2,3], [1,2,3])
decoded = Compiler.decode(program_text)
# Print.decode(decoded)
intermediate = Compiler.intermediate_representation(decoded)
if opt_run:
print
# VM.run(intermediate)
else:
if opt_parse:
Print.intermediate(intermediate)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment