K-atc/Readme.md

## Readme.md

      
    Raw
  

              Readme.md
            
          
    Whitespace Parser


-p オプション必須
プログラムは暫定的


## whitespace.py
#!/usr/bin/python
# -*- coding: utf-8 -*-

import optparse
import sys

opt_run   = True
opt_parse = False
opt_verbose = False

SPACE = ord(' ')  # 32
TAB   = ord('\t') # 9
LF    = ord('\n') # 10

PARAM_NONE = 0
PARAM_NUM = 1
PARAM_LABEL = 2

# runtime error
class InterpreterException(Exception):
    def __init__(cls, ip, message):
        msg = "Error in ip=%d --> %s" % ((ip, message))
        Exception.__init__(cls, msg)

# IMP
IMPS = [[SPACE], [TAB, SPACE], [TAB, TAB], [LF], [TAB, LF]]

# whitespace commands (see http://compsoc.dur.ac.uk/whitespace/tutorial.php)
instructions = {
    # Stack Manipulation (IMP: [Space])
    'PUSH': ((SPACE, SPACE), PARAM_NUM, 'Push the number onto the stack'),
    'SDUPLI': ((SPACE, LF, SPACE), PARAM_NONE, 'Duplicate the top item on the stack'),
    'SCOPY': ((SPACE, TAB, SPACE), PARAM_NUM, 'Copy the nth item on the stack onto the top of the stack'),
    'SSWAP': ((SPACE, LF, TAB), PARAM_NONE, 'Swap the top two items on the stack'),
    'SDISCARD': ((SPACE, LF, LF), PARAM_NONE, 'Discard the top item on the stack'),
    'SSLIDE': ((SPACE, TAB, LF), PARAM_NUM, 'Slide n items off the stack, keeping the top item'),

    # Arithmetic (IMP: [Tab][Space])
    'ADD': ((TAB, SPACE, SPACE, SPACE), PARAM_NONE, 'Addition'),
    'SUB': ((TAB, SPACE, SPACE, TAB), PARAM_NONE, 'Subtraction'),
    'MUL': ((TAB, SPACE, SPACE, LF), PARAM_NONE, 'Multiplication'),
    'DIV': ((TAB, SPACE, TAB, SPACE), PARAM_NONE, 'Integer Division'),
    'MOD': ((TAB, SPACE, TAB, TAB), PARAM_NONE, 'Modulo'),

    # Heap Access (IMP: [Tab][Tab])
    'STORE': ((TAB, TAB, SPACE), PARAM_NONE, 'Store'),
    'RETRIEVE': ((TAB, TAB, TAB), PARAM_NONE, 'Retrieve'),

    # Flow Control (IMP: [LF])
    'LABEL': ((LF, SPACE, SPACE), PARAM_LABEL, 'Mark a location in the program'),
    'CALL': ((LF, SPACE, TAB), PARAM_LABEL, 'Call a subroutine'),
    'JUMP': ((LF, SPACE, LF), PARAM_LABEL, 'Jump unconditionally to a label'),
    'JUMP-ZERO': ((LF, TAB, SPACE), PARAM_LABEL, 'Jump to a label if the top of the stack is zero'),
    'JUMP-NEG': ((LF, TAB, TAB), PARAM_LABEL, 'Jump to a label if the top of the stack is negative'),
    'RETURN': ((LF, TAB, LF), PARAM_NONE, 'End of subroutine'),
    'END': ((LF, LF, LF), 0, 'End the program'),

    # I/O (IMP: [Tab][LF])
    'OUT-CHAR': ((TAB, LF, SPACE, SPACE), PARAM_NONE, 'Output the character at the top of the stack'),
    'OUT-NUM': ((TAB, LF, SPACE, TAB), PARAM_NONE, 'Output the number at the top of the stack'),
    'IN-CHAR': ((TAB, LF, TAB, SPACE), PARAM_NONE, 'Read a character and place it in the location given by the top of the stack'),
    'IN-NUM': ((TAB, LF, TAB, TAB), PARAM_NONE, 'Read a number and place it in the location given by the top of the stack')
}

class Compiler:
    @staticmethod
    def is_same_vactor(v1, v2):
        if(len(v1) != len(v2)):
            return False
        for i in range(len(v1)):
            if v1[i] != v2[i]:
                return False
        return True

    @classmethod
    def imp_compatible(cls, imp):
        for test in IMPS:
            if cls.is_same_vactor(imp, test):
                return True
        return False

    @classmethod
    def get_command_name(cls, imp, cmd):
        tokens = imp + cmd
        for cmd_name in instructions.keys():
            if cls.is_same_vactor(instructions[cmd_name][0], tokens):
                return cmd_name
        return ''

    @staticmethod
    def need_parameter(command_name):
        if command_name == '':
            return False
        info = instructions[command_name]
        return bool(info[1] > PARAM_NONE)

    @staticmethod
    def decode(program_text):
        d = []
        for ichr in program_text:
            if ichr == ' ':
                d.append(SPACE)
            elif ichr == '\t':
                d.append(TAB)
            elif ichr == '\n':
                d.append(LF)
        return d

    @staticmethod
    def format_parameter(cmd_name, parameter):
        param_type = instructions[cmd_name][1]
        if param_type == PARAM_NUM:
            ret = 0
            for i in range(1,len(parameter)):
                if parameter[i] == TAB:
                    ret += 1
                ret <<= 1
            ret >>= 1
            if parameter[0] == TAB:
                ret *= -1
            return str(ret)
        elif param_type == PARAM_LABEL:
            ret = 0
            for x in parameter:
                if x == TAB:
                    ret += 1
                ret <<= 1
            ret >>= 1
            return str(ret)
        elif param_type == PARAM_NONE:
            return ''

    @classmethod
    def intermediate_representation(cls, decoded):
        intermediate = []
        imp = []
        command = []
        parameter = []
        imp_fetched = False
        command_fetched = False
        parameter_fetch = True
        fetch = False
        command_name = ''
        parameter_value = None
        care_x = False # fetch完了後にxをケアする必要があるかフラグ
        for x in decoded:
            # print "----"
            # print "x = %d" % x
            # print imp
            # print command
            # print parameter
            if imp_fetched == False: # requred
                if len(imp) > 0 and cls.imp_compatible(imp):
                    imp_fetched = True
                    # this x is a part of the command
                    # goto command fetch
                else:
                    imp.append(x)
                    continue
            if command_fetched == False: # requied
                command_name = cls.get_command_name(imp, command)
                if len(command) > 10:
                    break
                # print "command_name = %s" % command_name
                if len(command) > 0 and command_name != '':
                    command_fetched = True
                    if cls.need_parameter(command_name):
                        parameter_fetch = True
                        # this x is a part of the command
                        # goto parameter fetch
                    else:
                        # Note: ここの時点で x は宙ぶらりん
                        care_x = True
                        parameter_fetch = False
                        fetch = True
                        # goto fetch
                else:
                    command.append(x)
                    continue
            if parameter_fetch:
                if len(parameter) > 0 and x == LF: # ここで x 消費
                    parameter_value = cls.format_parameter(command_name, parameter)
                    parameter_fetch = False
                    fetch = True
                else:
                    parameter.append(x)
                    continue
            if fetch:
                # print "fetch done"
                # print "command_name = %s" % command_name
                # print "parameter_value = %s" % parameter_value
                if parameter_value != None:
                    intermediate.append([command_name, parameter_value])
                else:
                    intermediate.append([command_name])
                imp = []
                command = []
                parameter = []
                imp_fetched = False
                command_fetched = False
                parameter_fetch = True
                fetch = False
                parameter_value = None
                if care_x: # 宙ぶらりんな x を入れてあげる
                    imp = [x]
                care_x = False

        # for last char(this should be last of END sequence(LF))
        # print "--- last ---"
        # print imp
        # print command
        command_name = cls.get_command_name(imp, command)
        intermediate.append([command_name])

        return intermediate

class Print:
    @staticmethod
    def decode(codes):
        print codes

    @staticmethod
    def intermediate(codes):
        # print "@Print.intermediate: %s" % codes
        for code in codes:
            intext = ""
            if len(code[0]) < 8:
                intext = "\t\t".join(code)
            else:
                intext = "\t".join(code)
            intext += "\t"
            if len(code) < 2:
                if len(code[0]) < 8:
                    intext += "\t\t"
                else:
                    intext += "\t"
            intext += "\t;%s" % instructions[code[0]][2]
            print "%s" % intext

if __name__ == '__main__':

    parser = optparse.OptionParser()
    parser.add_option("-v", "--verbose",  action="store_true", default=False, help="Activate verbose mode")
    # parser.add_option("-s", "--stack",  action="store_true", default=False, help="Show the stack after each intruction execution")
    parser.add_option("-p", "--parse",  action="store_true", default=False, help="Pause the execution after each instruction")

    (opts, args) = parser.parse_args()
    if len(args) != 1:
        print "Please specify the filename of the program"
        parser.print_help()
        print
        print "Whitespace interpreter by K_atc"
        print "http://katc.sakura.ne.jp/"
        sys.exit(-1)

    # Read options
    opt_verbose = opts.verbose
    opt_parse   = opts.parse

    if opt_parse:
        opt_run = False

    f = open(args[0])
    program_text = f.read()
    f.close()

    # print "same_vector = %s" % Compiler.is_same_vactor([1,2,3], [2,2,2])
    # print "same_vector = %s" % Compiler.is_same_vactor([1,2,3], [1,2,3])

    decoded = Compiler.decode(program_text)
    # Print.decode(decoded)
    intermediate = Compiler.intermediate_representation(decoded)
    if opt_run:
        print
        # VM.run(intermediate)
    else:
        if opt_parse:
            Print.intermediate(intermediate)
	#!/usr/bin/python
	# -- coding: utf-8 --

	import optparse
	import sys

	opt_run = True
	opt_parse = False
	opt_verbose = False

	SPACE = ord(' ') # 32
	TAB = ord('\t') # 9
	LF = ord('\n') # 10

	PARAM_NONE = 0
	PARAM_NUM = 1
	PARAM_LABEL = 2

	# runtime error
	class InterpreterException(Exception):
	def __init__(cls, ip, message):
	msg = "Error in ip=%d --> %s" % ((ip, message))
	Exception.__init__(cls, msg)

	# IMP
	IMPS = [[SPACE], [TAB, SPACE], [TAB, TAB], [LF], [TAB, LF]]

	# whitespace commands (see http://compsoc.dur.ac.uk/whitespace/tutorial.php)
	instructions = {
	# Stack Manipulation (IMP: [Space])
	'PUSH': ((SPACE, SPACE), PARAM_NUM, 'Push the number onto the stack'),
	'SDUPLI': ((SPACE, LF, SPACE), PARAM_NONE, 'Duplicate the top item on the stack'),
	'SCOPY': ((SPACE, TAB, SPACE), PARAM_NUM, 'Copy the nth item on the stack onto the top of the stack'),
	'SSWAP': ((SPACE, LF, TAB), PARAM_NONE, 'Swap the top two items on the stack'),
	'SDISCARD': ((SPACE, LF, LF), PARAM_NONE, 'Discard the top item on the stack'),
	'SSLIDE': ((SPACE, TAB, LF), PARAM_NUM, 'Slide n items off the stack, keeping the top item'),

	# Arithmetic (IMP: [Tab][Space])
	'ADD': ((TAB, SPACE, SPACE, SPACE), PARAM_NONE, 'Addition'),
	'SUB': ((TAB, SPACE, SPACE, TAB), PARAM_NONE, 'Subtraction'),
	'MUL': ((TAB, SPACE, SPACE, LF), PARAM_NONE, 'Multiplication'),
	'DIV': ((TAB, SPACE, TAB, SPACE), PARAM_NONE, 'Integer Division'),
	'MOD': ((TAB, SPACE, TAB, TAB), PARAM_NONE, 'Modulo'),

	# Heap Access (IMP: [Tab][Tab])
	'STORE': ((TAB, TAB, SPACE), PARAM_NONE, 'Store'),
	'RETRIEVE': ((TAB, TAB, TAB), PARAM_NONE, 'Retrieve'),

	# Flow Control (IMP: [LF])
	'LABEL': ((LF, SPACE, SPACE), PARAM_LABEL, 'Mark a location in the program'),
	'CALL': ((LF, SPACE, TAB), PARAM_LABEL, 'Call a subroutine'),
	'JUMP': ((LF, SPACE, LF), PARAM_LABEL, 'Jump unconditionally to a label'),
	'JUMP-ZERO': ((LF, TAB, SPACE), PARAM_LABEL, 'Jump to a label if the top of the stack is zero'),
	'JUMP-NEG': ((LF, TAB, TAB), PARAM_LABEL, 'Jump to a label if the top of the stack is negative'),
	'RETURN': ((LF, TAB, LF), PARAM_NONE, 'End of subroutine'),
	'END': ((LF, LF, LF), 0, 'End the program'),

	# I/O (IMP: [Tab][LF])
	'OUT-CHAR': ((TAB, LF, SPACE, SPACE), PARAM_NONE, 'Output the character at the top of the stack'),
	'OUT-NUM': ((TAB, LF, SPACE, TAB), PARAM_NONE, 'Output the number at the top of the stack'),
	'IN-CHAR': ((TAB, LF, TAB, SPACE), PARAM_NONE, 'Read a character and place it in the location given by the top of the stack'),
	'IN-NUM': ((TAB, LF, TAB, TAB), PARAM_NONE, 'Read a number and place it in the location given by the top of the stack')
	}

	class Compiler:
	@staticmethod
	def is_same_vactor(v1, v2):
	if(len(v1) != len(v2)):
	return False
	for i in range(len(v1)):
	if v1[i] != v2[i]:
	return False
	return True

	@classmethod
	def imp_compatible(cls, imp):
	for test in IMPS:
	if cls.is_same_vactor(imp, test):
	return True
	return False

	@classmethod
	def get_command_name(cls, imp, cmd):
	tokens = imp + cmd
	for cmd_name in instructions.keys():
	if cls.is_same_vactor(instructions[cmd_name][0], tokens):
	return cmd_name
	return ''

	@staticmethod
	def need_parameter(command_name):
	if command_name == '':
	return False
	info = instructions[command_name]
	return bool(info[1] > PARAM_NONE)

	@staticmethod
	def decode(program_text):
	d = []
	for ichr in program_text:
	if ichr == ' ':
	d.append(SPACE)
	elif ichr == '\t':
	d.append(TAB)
	elif ichr == '\n':
	d.append(LF)
	return d

	@staticmethod
	def format_parameter(cmd_name, parameter):
	param_type = instructions[cmd_name][1]
	if param_type == PARAM_NUM:
	ret = 0
	for i in range(1,len(parameter)):
	if parameter[i] == TAB:
	ret += 1
	ret <<= 1
	ret >>= 1
	if parameter[0] == TAB:
	ret *= -1
	return str(ret)
	elif param_type == PARAM_LABEL:
	ret = 0
	for x in parameter:
	if x == TAB:
	ret += 1
	ret <<= 1
	ret >>= 1
	return str(ret)
	elif param_type == PARAM_NONE:
	return ''

	@classmethod
	def intermediate_representation(cls, decoded):
	intermediate = []
	imp = []
	command = []
	parameter = []
	imp_fetched = False
	command_fetched = False
	parameter_fetch = True
	fetch = False
	command_name = ''
	parameter_value = None
	care_x = False # fetch完了後にxをケアする必要があるかフラグ
	for x in decoded:
	# print "----"
	# print "x = %d" % x
	# print imp
	# print command
	# print parameter
	if imp_fetched == False: # requred
	if len(imp) > 0 and cls.imp_compatible(imp):
	imp_fetched = True
	# this x is a part of the command
	# goto command fetch
	else:
	imp.append(x)
	continue
	if command_fetched == False: # requied
	command_name = cls.get_command_name(imp, command)
	if len(command) > 10:
	break
	# print "command_name = %s" % command_name
	if len(command) > 0 and command_name != '':
	command_fetched = True
	if cls.need_parameter(command_name):
	parameter_fetch = True
	# this x is a part of the command
	# goto parameter fetch
	else:
	# Note: ここの時点で x は宙ぶらりん
	care_x = True
	parameter_fetch = False
	fetch = True
	# goto fetch
	else:
	command.append(x)
	continue
	if parameter_fetch:
	if len(parameter) > 0 and x == LF: # ここで x 消費
	parameter_value = cls.format_parameter(command_name, parameter)
	parameter_fetch = False
	fetch = True
	else:
	parameter.append(x)
	continue
	if fetch:
	# print "fetch done"
	# print "command_name = %s" % command_name
	# print "parameter_value = %s" % parameter_value
	if parameter_value != None:
	intermediate.append([command_name, parameter_value])
	else:
	intermediate.append([command_name])
	imp = []
	command = []
	parameter = []
	imp_fetched = False
	command_fetched = False
	parameter_fetch = True
	fetch = False
	parameter_value = None
	if care_x: # 宙ぶらりんな x を入れてあげる
	imp = [x]
	care_x = False

	# for last char(this should be last of END sequence(LF))
	# print "--- last ---"
	# print imp
	# print command
	command_name = cls.get_command_name(imp, command)
	intermediate.append([command_name])

	return intermediate

	class Print:
	@staticmethod
	def decode(codes):
	print codes

	@staticmethod
	def intermediate(codes):
	# print "@Print.intermediate: %s" % codes
	for code in codes:
	intext = ""
	if len(code[0]) < 8:
	intext = "\t\t".join(code)
	else:
	intext = "\t".join(code)
	intext += "\t"
	if len(code) < 2:
	if len(code[0]) < 8:
	intext += "\t\t"
	else:
	intext += "\t"
	intext += "\t;%s" % instructions[code[0]][2]
	print "%s" % intext

	if __name__ == '__main__':

	parser = optparse.OptionParser()
	parser.add_option("-v", "--verbose", action="store_true", default=False, help="Activate verbose mode")
	# parser.add_option("-s", "--stack", action="store_true", default=False, help="Show the stack after each intruction execution")
	parser.add_option("-p", "--parse", action="store_true", default=False, help="Pause the execution after each instruction")

	(opts, args) = parser.parse_args()
	if len(args) != 1:
	print "Please specify the filename of the program"
	parser.print_help()
	print
	print "Whitespace interpreter by K_atc"
	print "http://katc.sakura.ne.jp/"
	sys.exit(-1)

	# Read options
	opt_verbose = opts.verbose
	opt_parse = opts.parse

	if opt_parse:
	opt_run = False

	f = open(args[0])
	program_text = f.read()
	f.close()

	# print "same_vector = %s" % Compiler.is_same_vactor([1,2,3], [2,2,2])
	# print "same_vector = %s" % Compiler.is_same_vactor([1,2,3], [1,2,3])

	decoded = Compiler.decode(program_text)
	# Print.decode(decoded)
	intermediate = Compiler.intermediate_representation(decoded)
	if opt_run:
	print
	# VM.run(intermediate)
	else:
	if opt_parse:
	Print.intermediate(intermediate)