szczys/fbb_as_0.6dev.py Secret

## fbb_as_0.6dev.py
#!/usr/bin/env python3
import argparse
import string

__version__ = "0.6dev"
comment_delimiter = ";"     #Must be a single character or tokenizer will break
accepted_chars = string.ascii_letters+string.digits+"._"
special_delimiters = [',',':','[',']','+','-']  #Characters that me up the ASM language
modifying_keywords = ["LOW","MID","HIGH"]
modifying_operators = ["+","-"]

#Output options
show_output = True
show_verbose = True     #Prints all optional things (spaces, comments, empty lines)
show_wordspace = False
show_linenums = False
show_comments = False

header = [0x00, 0xFF, 0x00, 0xFF, 0xA5, 0xC3]

named_registers = (
    "R0",
    "R1",
    "R2",
    "R3",
    "R4",
    "R5",
    "R6",
    "R7",
    "R8",
    "R9",
    "OUT",
    "IN",
    "JSR",
    "PCL",
    "PCM",
    "PCH",
    )

special_registers = (
    #Important: Don't change the index positons of C,NC,Z,NZ as they're
    #being used in generating machine code
    "C",
    "NC",
    "Z",
    "NZ",
    "PC",
)

symbols = dict()

def parse_asm(lines_of_asm,hexfile_out=None):
    machine_lines = 0
    word_array = []
    global symbols

    #First pass: Tokenize the input
    try:
        code_list, symbols = get_tokenized_code(lines_of_asm)
    except ParserError:
        #Error message will have already printed so bail
        return

    #Second pass: Generate output
    for i, c in enumerate(code_list):
        c = code_list[i]
        tokens = c.tokens

        #Print Blank Lines and Tokens in Verbose Mode
        if tokens == None:
            if show_verbose and show_output:
                print_output(c.source, None, None)
            continue
        elif tokens[0] in symbols:
            if show_verbose and show_output:
                print_output(c.source, None, None)
            continue

        #Do a substitution pass for variables in this set of tokens
        working_tokens = list()
        for t in tokens:
            if type(t) == SmartToken:
                working_tokens.append(t.resolve(symbols))
            elif t in special_delimiters:
                continue
            else:
                working_tokens.append(t.upper())
        try:
            opcode = working_tokens[0]
            if opcode.upper() == "ORG":
                binary = instructions.get(working_tokens[0])(working_tokens, machine_lines)
            else:
                binary = instructions.get(working_tokens[0])(working_tokens)
        except ParserError as e:
            print_error(e, i, c.source)
            return

        if binary == None:
            print_error(ParserError("E::Uncaught syntax error"), i, c.source)
            return

        else:
            for bin_line in binary.split('\n'):
                #Special directives GOSUB, GOTO, and ORG will return multiple lines
                ln = machine_lines if (show_linenums or show_verbose) else None
                cm = c.comment if (show_comments or show_verbose) else None
                if show_output:
                    print_output(bin_line,ln,cm)
                machine_lines += 1

                if hexfile_out != None:
                    word_array.append(bin_line.replace(" ",""))

    if hexfile_out != None:
        with open(hexfile_out, "wb") as f:
            f.write(bytes(generate_hex(word_array)))
        return True

def get_tokenized_code(lines_of_asm):
    code_array = []
    symbols = dict()

    reg_addr = 0

    raw_code = lines_of_asm.split('\n')
    for i in range(len(raw_code)):
        try:
            code_obj = parse_line(raw_code[i])

        except ParserError as e:
            print_error(e, i, raw_code[i])
            raise ParserError()

        code_array.append(code_obj)

        #Validate the opcodes/labels during first pass
        if code_obj.tokens != None:
            token = code_obj.tokens[0].upper()
            if token not in instructions:
                #This must be a symbol or a variable definition
                if token in symbols:
                    print_error("E::Cannot define a token that was previously defined", i, code_obj.source)
                    raise ParserError()
                else:
                    #Prewind the register number for use if this turns out to be a label
                    s_value = reg_addr
                    if len(code_obj.tokens) > 1:
                        if code_obj.tokens[1].upper() == "EQU":
                            #This is a variable definition, reset this value for the symbol table
                            s_value = code_obj.tokens[2].resolve(symbols)
                    #Write correct value to symbols table
                    symbols[token] = s_value
            else:
                if token == "GOSUB" or token == "GOTO":
                    #These directives will add two lines of code instead of one so adjust here
                    reg_addr += 2
                elif token == "ORG":
                    new_linenum = get_dec_or_token(code_obj.tokens[1].resolve([]))
                    if type(new_linenum) != int or not 0 <= new_linenum < 4096:
                        print_error("E::This opcode requires a number [0..4095] as argument but got %s" % new_linenum, i, code_obj.source)
                        raise ParserError()
                    elif new_linenum <= reg_addr:
                        raise ParserError("E::The ORG opcode requires the argument (%d) be greater than the current program memory register number (%d)" % (tokens[1], linenum))
                    else:
                        reg_addr = new_linenum
                else:
                    #All other instructions increment the address by one
                    reg_addr += 1
    return code_array,symbols

def tokenize(instring, delimiters=special_delimiters+[comment_delimiter]):
    '''
    Tokenize a string of ASM code, splitting based on special characters
    but at the same time including delimiters (but not whitespace) in the set
    '''
    tokens = instring.split()
    for d in delimiters:
        newtokens = list()
        for t in tokens:
            raw = t.split(d)
            for r_idx, r_token in enumerate(raw):
                if r_token != '':
                    '''
                    element will be empty when delimiter begins or
                    ends the string that was split
                    so don't add empty elements
                    '''
                    newtokens.append(r_token)
                if r_idx != len(raw)-1:
                    newtokens.append(d)
        tokens = newtokens
    return tokens

def parse_line(instring):
    '''
    Performs the work of tokenizing a single line of code,
    ensuring that the syntax is valid (although this does
    not mean the combo of instructions and operands is valid.)

    Returns CodePack() object
    '''
    raw_tokens = tokenize(instring)
    parsed_tokens = list()
    code = CodePack(source=instring)
    s = TokenizerStates()

    #Preserve full comment
    if comment_delimiter in instring:
        d_idx = instring.index(comment_delimiter)
        code.comment = instring[d_idx:]

    #Custom type to contain Token streams. Will be used for symbol substition and math in second pass
    token_stream = SmartToken()

    #Walk through the flowchart
    for i, e in enumerate(raw_tokens):         #i=index e=element
        e = e.upper()
        if s.cur_state==s.OPCODE:
            if all(c in accepted_chars for c in e):
                parsed_tokens.append(e)
                s.cur_state = s.WATCH_TOKEN_SET
                continue
            elif e == comment_delimiter:
                #Comment already preserved, nothing left to parse
                break
            else:
                raise ParserError("E::Syntax error: Invalid characters found in: %s" % e)

        elif s.cur_state==s.WATCH_COMMENT:
            #The only trigger character valid from here on out is comment_delimiter
            if e != comment_delimiter:
                raise ParserError("E::Expected comment (%s) or end of line but got %s" % (comment_delimiter, e))
            else:
                #Comment already preserved, nothing left to do
                break

        elif s.cur_state==s.WATCH_COMMA_COMMENT:
            if e==",":
                #Should be another token or set coming
                s.cur_state = s.WATCH_TOKEN_SET
                continue
            elif e==comment_delimiter:
                #Comment already preserved, nothing left to do
                break
            else:
                raise ParserError("E::Expected comma (,) or comment (%s) but got %s" % (comment_delimiter, e))

        elif s.cur_state in [s.WATCH_TOKEN_SET,
                s.TOKEN_COLON_BRACKET,
                s.TOKEN_BRACKET,
                s.TOKEN_COMMA_COMMENT,
                s.TOKEN_COMMENT,
                s.TOKEN_VAR_DEF]:

            '''
            Need to know what came before. This is tricky because sometimes we will already
            be filling a token stream and other times we'll just be starting one
            '''
            if len(token_stream) != 0:
                previous = token_stream[-1]
            else:
                previous = parsed_tokens[-1]

            if type(previous) == int:
                prev_isvalid = True
            else:
                #Raised error tells us this is not a valid token but has special characters in it
                try:
                    validate_token(previous)
                    prev_isvalid = True
                except:
                    prev_isvalid = False

            if e=="EQU":
                if s.cur_state == s.WATCH_TOKEN_SET and len(parsed_tokens)==1 and len(token_stream)==0:
                    #Found EQU in the right place
                    parsed_tokens.append(e)
                    s.cur_state = s.TOKEN_VAR_DEF
                    continue
                raise ParserError("E::Unexpected EQU after %s" % str(previous))
            elif e=='[':
                if s.cur_state == s.WATCH_TOKEN_SET:
                    #Found [ in right place
                    token_stream.append(e)
                    s.cur_state = s.TOKEN_COLON_BRACKET
                    continue
                raise ParserError("E::Unexpected opening bracket ([) after %s" % str(previous))
            elif e==':':
                if s.cur_state == s.TOKEN_COLON_BRACKET and prev_isvalid:
                    #Found : in right place
                    token_stream.append(e)
                    s.cur_state = s.TOKEN_BRACKET
                    continue
                elif len(parsed_tokens)==1 and prev_isvalid:
                    #This is a label assignment; Nothing should come after this but a comment
                    #This colon is not inside of brackets so it isn't part of a token_stream; it's part of parsed_tokens
                    parsed_tokens.append(e)
                    s.cur_state = s.WATCH_COMMENT
                    continue
                raise ParserError("E::Unexpected colon (:) after %s" % str(previous))
            elif e=="]":
                if s.cur_state in [s.TOKEN_BRACKET, s.TOKEN_COLON_BRACKET] and prev_isvalid:
                    #Found ] in right place
                    token_stream.append(e)
                    parsed_tokens.append(token_stream)
                    token_stream = SmartToken()
                    s.cur_state = s.WATCH_COMMA_COMMENT
                    continue
                raise ParserError("E::Unexpected closing bracket (]) after " % str(previous))
            elif e==",":
                if s.cur_state == s.TOKEN_COMMA_COMMENT and prev_isvalid:
                    #Found , in right place
                    if len(token_stream) != 0:
                        parsed_tokens.append(token_stream)
                        token_stream = SmartToken()
                    parsed_tokens.append(e)
                    s.cur_state = s.WATCH_TOKEN_SET
                    continue
                raise ParserError("E::Unexpected comma (,) after %s" % str(previous))
            elif e==";":
                if s.cur_state in [s.TOKEN_COMMA_COMMENT, s.TOKEN_COMMENT, s.TOKEN_VAR_DEF] and prev_isvalid:
                    if len(token_stream) != 0:
                        parsed_tokens.append(token_stream)
                        token_stream = SmartToken() #Probably don't need to reset this but just in case
                    if s.cur_state==s.TOKEN_VAR_DEF:
                        if len(parsed_tokens) != 3:
                            raise ParserError("E::Wrong number of items in EQU statement")
                    #Found ; in right place, no need to parse more
                    break
                raise ParserError("E::Unexpected opening semicolon (;) after " % str(previous))

            elif e in modifying_keywords:
                if previous in token_preceders:
                    #Valid HIGH/LOW modifier
                    token_stream.append(e)
                    continue
                raise ParserError("E::Unexpected modifier %s after %s" % (e,str(previous)))
            elif e in modifying_operators:
                if previous in token_preceders:
                    #Hack: add 0 before leading operator (like -/+)
                    token_stream.append(0)
                    token_stream.append(e)
                    continue
                elif prev_isvalid:
                    #Operators can follow valid tokens
                    token_stream.append(e)
                    continue
                raise ParserError("E::Unexpected operator %s after %s" % (e,str(previous)))
            else:

                #Should have taken care of all modifiers and dividers
                #This will raise an error if it is not a valid token being added
                token_stream.append(validate_token(e))
                if s.cur_state == s.WATCH_TOKEN_SET:
                    #We got the token we were watching for so now look for more of this token, or a comma or comment
                    s.cur_state = s.TOKEN_COMMA_COMMENT
                continue
        else:
            raise ParserError("E::Unknown parser state machine cur_state value: %s" % s.cur_state)

    #Catch any token_streams that weren't written
    if len(token_stream) != 0:
        parsed_tokens.append(token_stream)
    if len(parsed_tokens) != 0:
        code.tokens = parsed_tokens
    return code

def validate_token(e):
    '''
    Validates the token part of the stoken stream (letter, numbers, underscore, period)
    but raised error if special characters like +,-,[,] are found.
    '''
    if all(c in accepted_chars for c in e):
        return e
    else:
        raise ParserError("E::Illegal characters in token: %s", e)

def format_unexpected_char_error(element,previous):
    return format("E::Syntax error: Unexpected %s after %s" % (element,str(previous)))

class TokenizerStates:
    def __init__(self):
        self.OPCODE = 0
        self.TOKEN_COMMA_COMMENT = 1
        self.TOKEN_COMMENT = 2
        self.TOKEN_COLON_BRACKET = 3
        self.TOKEN_BRACKET = 4
        self.BRACKETS = 5
        self.WATCH_TOKEN_SET = 6
        self.WATCH_COMMA_COMMENT = 7
        self.WATCH_COMMENT = 8
        self.TOKEN_VAR_DEF = 9
        self.cur_state = self.OPCODE
        self.reset_buffers()

    def reset_buffers(self):
        self.bracket_token_buffer = []

class SmartToken(list):
    def __init__(self, data=None):
        if (data != None):
            self._stream = list(data)
        else:
            self._stream = list()
    def resolve(self, symbols):
        is_set = False
        found_named_reg = False
        prefix = None
        working_token = None
        resolved_set = None
        for i, e in enumerate(self):
            try:
                e = e.upper()
            except:
                pass
            if e in named_registers+special_registers:
                    found_named_reg = True

            if e=='[':
                if i==0:
                    is_set = True
                    resolved_set = list()
                    continue
                else:
                    raise Exception("Unexpected opening bracket when parsing smart token. This should never happen")
            elif e==":":
                resolved_set.append(working_token)
                found_named_reg = False
                prefix = None
                working_token = None
                continue
            elif e=="]":
                if i==len(self)-1:
                    resolved_set.append(working_token)
                    found_named_reg = False
                    prefix = None
                    working_token = None
                    continue
                else:
                    raise Exception("Unexpected opening bracket when parsing smart token. This should never happen")
            elif e in modifying_keywords+modifying_operators:
                prefix = e
                continue

            #Everything that's not a symbol or a token has been filtered out by now
            if e in symbols:
                filtered_t = symbols[e]
            else:
                filtered_t = get_dec_or_token(e)

            if prefix != None:
                if found_named_reg == True:
                    #If a named register is already in the working_token this will already be set to True
                    raise ParserError("E::Syntax error: Modifications like (%s) may only be performed on numbers but a named register was found." % prefix)
                if type(filtered_t) != int:
                    raise ParserError("E::Syntax error: Modifications like (%s) may only be performed on numbers but %s was found." % prefix)
                if prefix=="LOW":
                    twelvebits = format(filtered_t, "012b")
                    working_token = int(twelvebits[8:],2)
                elif prefix=="MID":
                    twelvebits = format(filtered_t, "012b")
                    working_token = int(twelvebits[4:8],2)
                elif prefix=="HIGH":
                    twelvebits = format(filtered_t, "012b")
                    working_token = int(twelvebits[:4],2)
                elif prefix=="+":
                    working_token += filtered_t
                elif prefix=="-":
                    working_token -= filtered_t
            elif working_token == None:
                working_token = filtered_t
            else:
                raise Exception("Error, multiple tokens without modifiers. This should never happen")

        if is_set:
            if resolved_set != None:
                return resolved_set
        else:
            if working_token != None:
                return working_token

        raise Exception("Error, SmartToken.resolve() was unable to finish and didn't raise ParserError(). This should never happen.")

def get_dec_or_token(token):
    #Takes a string
    #  returns a decimal number if that string was a number (decimal, hex, or binary)
    #  otherwise returns the string

    if type(token) == int:
        return token
    base = 10
    if len(token) > 2:
        if token[:2].lower() == "0x":
            base = 16
        elif token[:2].lower() == "0b":
            base = 2

    try:
        #Try to return it as a number
        return int(token, base)
    except:
        #Otherwise it must be a token
        return token

def checksum(hexarray):
    #Returns 16-bit checksum
    #    Param: Hexarray is an array of hex values. There must be an even number,
    #    each pair arrange with low byte first, high second
    #
    #    Return: 16-bit checksum as two hex values, low byte first, high second

    checksum = 0
    for low,high in zip(*[iter(hexarray)]*2):
        checksum += low + (high*256)
        checksum = checksum % 0xFFFF
    byte_h = int(checksum/256)
    byte_l =  int(checksum%256)
    return [byte_l, byte_h]

def generate_hex(program_list, h=header):
    len_message = twelve_bit_to_hex_bytes(format(len(program_list),"012b"))
    message = len_message
    for i in program_list:
        message += twelve_bit_to_hex_bytes(i)
    #This is a dirty hack because checksum takes and returns strings
    message += checksum(message)
    message = h + message
    return message


def twelve_bit_to_hex_bytes(twelvebit):
    #Takes a string that is a 12-bit binary number
    #Returns two decimal bytes, low byte first
    return [int(twelvebit[-8:],2),int(twelvebit[:4],2)]

def args_rxry(tokens,opcode):
    #ADD ADC SUB SBB OR AND XOR MOV
    arg_count_test(len(tokens),3)
    if tokens[1] in named_registers and tokens[2] in named_registers:
        return format_output(opcode, get_reg_binary(tokens[1]), get_reg_binary(tokens[2]))
    else:
        raise ParserError("E::This opcode requires register names as arguments")

def args_r0n(tokens,opcode):
    #CP ADD OR AND XOR RET
    arg_count_test(len(tokens),3)
    if tokens[1] != "R0":
        raise ParserError("E::This opcode requires R0 as the first argument")
    elif not is_int(tokens[2]):
        raise ParserError("E::This opcode requires a number as the second argument")
    else:
        return format_output("0000", opcode, get_four_bit_binary(tokens[2]))

def args_ry(tokens,opcode):
    #INC DEC DSZ RRC
    arg_count_test(len(tokens),2)
    if tokens[1] not in named_registers:
        raise ParserError("E::This opcode requires a register name as the argument")
    else:
        return format_output("0000", opcode, get_reg_binary(tokens[1]))

def args_rgm(tokens,opcode):
    #BIT BSET BCLR BTG
    arg_count_test(len(tokens),3)
    if tokens[1] not in ["R0","R1","R2","R3"]:
        raise ParserError("E::This opcode requires R0, R1, R2, or R3 as the first argument")
    elif not is_int(tokens[2]) or not 0 <= tokens[2] < 4:
        raise ParserError("E::This opcode requires a number [0..3] as the second argument")
    else:
        return format_output("0000", opcode, get_reg_binary(tokens[1])[2:]+get_four_bit_binary(tokens[2])[2:])

def args_go(tokens,trigger_reg):
    arg_count_test(len(tokens),2)
    reg_value = tokens[1]
    if 0 <= reg_value < 4096:
        word_h, word_m, word_l = pc_addr_to_bin(reg_value)
        two_lines = format_output("1110", word_h, word_m) + "\n" + format_output("1001",trigger_reg,word_l)
        return two_lines
    else:
        raise ParserError("E::Register value is out of range (0 <= reg_value < 4096): %d" % reg_value)

def opcode_add(tokens):
    #ADD R0,N
    if is_int(tokens[2]):
        return args_r0n(tokens,"0001")
    #ADD RX,RY
    else:
        return args_rxry(tokens,"0001")

def opcode_adc(tokens):
    return args_rxry(tokens,"0010")

def opcode_sub(tokens):
    return args_rxry(tokens,"0011")

def opcode_sbb(tokens):
    return args_rxry(tokens,"0100")

def opcode_or(tokens):
    #OR R0,N
    if is_int(tokens[2]):
        return args_r0n(tokens,"0101")
    #OR RX,RY
    else:
        return args_rxry(tokens,"0101")

def opcode_and(tokens):
    #AND R0,N
    if is_int(tokens[2]):
        return args_r0n(tokens,"0110")
    #AND RX,RY
    else:
        return args_rxry(tokens,"0110")

def opcode_xor(tokens):
    #XOR R0,N
    if is_int(tokens[2]):
        return args_r0n(tokens,"0111")
    #XOR RX,RY
    else:
        return args_rxry(tokens,"0111")

def opcode_mov(tokens):
    arg_count_test(len(tokens),3)
    if any(isinstance(i,list) for i in tokens):
        #Must be instruction containing a set of brackets
        if tokens[1] == "PC":
            #MOV PC,NN
            if all(is_int(i) for i in tokens[2]):
                word_high, word_low = get_eight_bit_binary(tokens[2])
                return format_output("1110", word_high, word_low)
            else:
                raise ParserError("E::Expected literal number for MOV PC, NN")
        elif "R0" in tokens:
            if tokens[1] == "R0":
                if all(is_int(i) for i in tokens[2]):
                    #MOV R0,[NN]
                    word_high, word_low = get_eight_bit_binary(tokens[2])
                    return format_output("1101", word_high, word_low)
                elif len(tokens[2]) == 2 and all(i in named_registers for i in tokens[2]):
                    #MOV R0,[XY]
                    return format_output("1011", get_reg_binary(tokens[2][0]), get_reg_binary(tokens[2][1]))
                else:
                    raise ParserError("E::Type mismatch for values inside brackets")
            elif tokens[2] == "R0":
                if all(is_int(i) for i in tokens[1]):
                    #MOV [NN],R0
                    word_high, word_low = get_eight_bit_binary(tokens[1])
                    return format_output("1100", word_high, word_low)
                elif len(tokens[1]) == 2 and all(i in named_registers for i in tokens[1]):
                    #MOV [XY],R0
                    return format_output("1010", get_reg_binary(tokens[1][0]),get_reg_binary(tokens[1][1]))
                else:
                    raise ParserError("E::Type mismatch for values inside brackets")
            else:
                raise Exception()
    #Catch edge case syntax error
    elif tokens[1] == "PC":
        raise ParserError("E::Syntax error, numeric literal must be in brackets for PC,[NN]")
    #MOV RX,RY
    elif tokens[1] in named_registers and tokens[2] in named_registers:
        return args_rxry(tokens,"1000")
    #MOV RX,N
    elif is_int(tokens[2]):
        if tokens[1] in named_registers:
            return format_output("1001", get_reg_binary(tokens[1]), get_four_bit_binary(tokens[2]))
        else:
            raise ParserError("E::Expected register name for first argument of MOV RX,N")
    else:
        raise Exception()

def opcode_jr(tokens):
    arg_count_test(len(tokens),2)
    if all(is_int(i) for i in tokens[1]):
        word_high, word_low = get_eight_bit_binary(tokens[1], signed=True)
        return format_output("1111", word_high, word_low)
    else:
        raise ParserError("E::This opcode requires two numbers as arguments")

def opcode_cp(tokens):
    return args_r0n(tokens,"0000")

def opcode_inc(tokens):
    return args_ry(tokens,"0010")

def opcode_dec(tokens):
    return args_ry(tokens,"0011")

def opcode_dsz(tokens):
    return args_ry(tokens,"0100")

def opcode_exr(tokens):
    arg_count_test(len(tokens),2)
    if is_int(tokens[1]):
        return format_output("0000", "1000", get_four_bit_binary(tokens[1]))
    else:
        raise ParserError("E::This opcode requires a number as the argument")

def opcode_bit(tokens):
    return args_rgm(tokens,"1001")

def opcode_bset(tokens):
    return args_rgm(tokens,"1010")

def opcode_bclr(tokens):
    return args_rgm(tokens,"1011")

def opcode_btg(tokens):
    return args_rgm(tokens,"1100")

def opcode_rrc(tokens):
    return args_ry(tokens,"1101")

def opcode_ret(tokens):
    return args_r0n(tokens,"1110")

def opcode_skip(tokens):
    #SKIP F,M
    arg_count_test(len(tokens),3)
    if tokens[1] not in special_registers[:4]:
        raise ParserError("E::This opcode requires %s, %s, %s, or %s as the first argument" % special_registers[:4])
    elif not is_int(tokens[2]) or not 0 <= tokens[2] < 4:
        raise ParserError("E::This opcode requires a number [0..3] as the second argument")
    else:
        F = format(special_registers.index(tokens[1]),"02b")
        return format_output("0000", "1111", F + get_four_bit_binary(tokens[2])[2:])

def opcode_goto(tokens):
    return args_go(tokens,get_reg_binary("PCL"))

def opcode_gosub(tokens):
    return args_go(tokens,get_reg_binary("JSR"))

def opcode_org(tokens,linenum):
    arg_count_test(len(tokens),2)
    #This should have been validated as a number within range and greater than
    #current program memory register when the symbols table was calculated
    #in get_tokenized_code()
    lines_to_fill = tokens[1] - linenum
    return str("0000 0000 0000\n"*lines_to_fill).rstrip('\n')

def is_int(val):
    return(type(val) == int)

def get_four_bit_binary(value):
    #Convert int to 4-bit binary
    #Raise if out of range
    if not 0 <= value < 16:
        raise ParserError("E::Literal value out of range. Expected [0..15]")
    else:
        return format(value, "04b")

def get_eight_bit_binary(brackets, signed=False):
    #Convert array containing numbers to two 4-bit binary values
    #Raise if out of range
    if len(brackets) == 1:
        if not 0 <= brackets[0] < 256 and signed==False:
            raise ParserError("E::Literal value out of range. Expected [0..255]")
        elif not -128 <= brackets[0] < 127 and signed==True:
            raise ParserError("E::Literal value out of range. Expected [-128..127]")
        else:
            bin_byte = format(brackets[0] & int("11111111",2),"08b")
            return bin_byte[:4], bin_byte[4:]
    else:
        if any(not 0 <= i < 16 for i in brackets):
            raise ParserError("E::Literal value out of range. Expected [0..15]")
        else:
            return format(brackets[0], "04b"), format(brackets[1], "04b")


def get_reg_binary(reg_name):
    #Returns 4-bit binary value of a named register
    return format(named_registers.index(reg_name),"04b")

def arg_count_test(actual, expected):
    #Token count includes opcode so arg count will be one less
    if actual != expected:
        raise ParserError("E::Expected %d arguments for this opcode but got %d" % (expected-1, actual-1))

def format_output(*arg):
    #Argument list must all be strings
    if show_wordspace or show_verbose:
        format_string = ("%s " * len(arg)).rstrip()
    else:
        format_string = ("%s" * len(arg)).rstrip()
    outstring = format(format_string % arg)
    return outstring

def pc_addr_to_bin(pc_num):
    binary = format(pc_num, "012b")
    return (binary[:4],binary[4:8],binary[8:])

def print_output(binary,ln,cm):
    outstring = ""
    if ln != None:
        outstring += format(ln, " 4") + '\t'
    outstring += binary
    if cm != None:
        outstring += '\t' + cm
    print(outstring)

def print_error(e, line_num, line):
    print("%s\n\tLine %d:\t%s" % (e,line_num,line))

def read_asm_file(filename):
    with open(filename, 'r') as file:
        stream = file.read()
    return stream

class CodePack:
    def __init__(self, tokens=None, comment=None, source=None):
        self.tokens = tokens
        self.comment = comment
        self.source = source

class ParserError(Exception):
    pass

instructions = {
    "ADD": opcode_add, # 17
    "ADC": opcode_adc,
    "SUB": opcode_sub,
    "SBB": opcode_sbb,
    "OR":  opcode_or,  #21
    "AND": opcode_and, #22
    "XOR": opcode_xor, #23
    "MOV": opcode_mov, #9,10,11,12,13,14
    "JR":  opcode_jr,
    "CP":  opcode_cp,
    "INC": opcode_inc,
    "DEC": opcode_dec,
    "DSZ": opcode_dsz,
    "EXR": opcode_exr,
    "BIT": opcode_bit,
    "BSET": opcode_bset,
    "BCLR": opcode_bclr,
    "BTG": opcode_btg,
    "RRC": opcode_rrc,
    "RET": opcode_ret,
    "SKIP": opcode_skip,
    "GOTO": opcode_goto,
    "GOSUB": opcode_gosub,
    "ORG": opcode_org,
    }
token_preceders = [*instructions]+["EQU","[",":",","]

def main():
    print("Four-Bit-Badge Assembler version %s\n" % __version__)

    parser = argparse.ArgumentParser()
    parser.add_argument("asmfile", help="assembly language file to be processed")
    parser.add_argument("-q", help="Write to file without showing any human-readable output", action="store_true")
    parser.add_argument("-c", help="enable comments in readout", action="store_true")
    parser.add_argument("-n", help="enable line numbers in readout", action="store_true")
    group = parser.add_mutually_exclusive_group()
    group.add_argument("-s", help="Show 12-bit instructions with spaces between words", action="store_true")
    group.add_argument("-w", help="Show 12-bit instructions without spaces between words", action="store_true")
    args = parser.parse_args()

    global show_output
    global show_verbose
    global show_wordspace
    global show_linenums
    global show_comments

    if args.q:
        show_output = False
    if args.c:
        show_verbose = False
        show_comments = True
    if args.n:
        show_verbose = False
        show_linenums = True
    if args.s:
        show_verbose = False
        show_wordspace = True
    if args.w:
        show_verbose = False
        show_wordspace = False


    ext_idx = args.asmfile.rfind('.')
    if ext_idx > 0:
        outfile = args.asmfile[:ext_idx] + ".hex"
    else:
        outfile = args.asmfile + ".hex"

    if parse_asm(read_asm_file(args.asmfile),hexfile_out=outfile) == True:
        print("\nSuccessfully wrote hex file: %s\n" % outfile)

if __name__ == "__main__":
    main()

## fbb_dis_0.6dev.py
#!/usr/bin/env python3
import argparse
from fbb_as import checksum

__version__ = "0.6dev"
header = [0x00, 0xFF, 0x00, 0xFF, 0xA5, 0xC3]

#Output options
show_output = True
show_verbose = True     #Prints all optional things (spaces, comments, empty lines)
show_words = False
show_wordspace = True
show_linenums = False
show_source = False

def is_valid(hexarray, h=header):
    #Return True if hex string has header and correct checksum
    #    Param: Hexarray is a list of hex values. There must be an even number of
    #    elements, each pair arrange with low byte first, high second.
    #    It must begin with the header values and end with a valid
    #    16-bit checksum as low-byte, high-byte pair for hex values
    header_len = len(h)
    hexarray_len = len(hexarray)

    if hexarray_len%2 != 0:
        raise Exception("Binary message must be an even number of bytes but %d were found." % hexarray_len)
    if hexarray[:6] != h:
        raise Exception("Binary message must begin with header: %s but found: %s" % (str(h),str(hexarray)))


    message = hexarray[header_len:-2]
    csum = checksum(message)
    if csum == hexarray[-2:]:
        return True

    return False

def read_hex_file(filename):
    with open(filename, mode='rb') as file:
        stream = file.read()
    return [h for h in stream]

def write_asm_file(filename, contents):
    with open(filename, 'w') as file:
        file.writelines("%s\n" % i for i in contents)
    return True

def disassemble(hexarray, h=header, print_output=True, outfile=None):
    output_buffer = []
    if not is_valid(hexarray):
        raise Exception("Data has an invalid checksum")

    message = hexarray[len(h)+2:-2]
    for low,high in zip(*[iter(message)]*2):
        byte_l = format(low,"08b")
        byte_h = format(high,"08b")
        word_l = byte_l[4:]
        word_m = byte_l[:4]
        word_h = byte_h[4:]
        line_number = 0

        if word_h == "0000":
            source = excodes[word_m](word_h,word_m,word_l)
        else:
            source = opcodes[word_h](word_h,word_m,word_l)

        this_line = format_output_line(line_number, word_h, word_m, word_l, source)
        line_number += 1
        if print_output:
            print(this_line)
        if outfile != None:
            output_buffer.append(this_line)

    if outfile != None:
        return write_asm_file(outfile,output_buffer)


def format_output_line(ln, word_h, word_m, word_l, source):
    outstring = ""
    if show_linenums or show_verbose:
        outstring += format("%s\t" % format(ln," 5d"))

    if show_words or show_verbose:
        if show_wordspace or show_verbose:
            outstring += format("%s %s %s\t" % (word_h, word_m, word_l))
        else:
            outstring += format("%s%s%s\t" % (word_h, word_m, word_l))

    if show_source or show_verbose:
        outstring += format("%s" % source)

    return outstring.rstrip()

def args_rxry(instruction, oper_x, oper_y):
    return format("%s %s,%s" % (instruction, named_registers[int(oper_x,2)], named_registers[int(oper_y,2)]))

def args_ry(instruction, oper_y):
    return format("%s %s" % (instruction, named_registers[int(oper_y,2)]))

def args_r0n(instruction, oper_y):
    return format("%s R0,0b%s" % (instruction, oper_y))

def args_rgm(instruction, oper_y):
    reg = named_registers[int(oper_y[:2],2)]
    return format("%s %s,0b%s" % (instruction, reg, oper_y[2:]))

def op_add_rxry(word_h,word_m,word_l):
    return args_rxry("ADD",word_m,word_l)
def op_adc(word_h,word_m,word_l):
    return args_rxry("ADC",word_m,word_l)
def op_sub(word_h,word_m,word_l):
    return args_rxry("SUB",word_m,word_l)
def op_sbb(word_h,word_m,word_l):
    return args_rxry("SBB",word_m,word_l)
def op_or_rxry(word_h,word_m,word_l):
    return args_rxry("OR",word_m,word_l)
def op_and_rxry(word_h,word_m,word_l):
    return args_rxry("AND",word_m,word_l)
def op_xor_rxry(word_h,word_m,word_l):
    return args_rxry("XOR",word_m,word_l)
def op_mov_rxry(word_h,word_m,word_l):
    return args_rxry("MOV",word_m,word_l)
def op_mov_rxn(word_h,word_m,word_l):
    return format("MOV %s,0b%s" % (named_registers[int(word_m,2)], word_l))
def op_mov_xyr0(word_h,word_m,word_l):
    return format("MOV [%s:%s],R0" % (named_registers[int(word_m,2)], named_registers[int(word_l,2)]))
def op_mov_r0xy(word_h,word_m,word_l):
    return format("MOV R0,[%s:%s]" % (named_registers[int(word_m,2)], named_registers[int(word_l,2)]))
def op_mov_nnr0(word_h,word_m,word_l):
    return format("MOV [0b%s:0b%s],R0" % (word_m, word_l))
def op_mov_r0nn(word_h,word_m,word_l):
    return format("MOV R0,[0b%s:0b%s]" % (word_m, word_l))
def op_mov_pcnn(word_h,word_m,word_l):
    return format("MOV PC,[0b%s:0b%s]" % (word_m, word_l))
def op_jr(word_h,word_m,word_l):
    return format("JR [0b%s:0b%s]" % (word_m, word_l))
def op_cp(word_h,word_m,word_l):
    return args_r0n("CP", word_l)
def op_add_r0n(word_h,word_m,word_l):
    return args_r0n("ADD", word_l)
def op_inc(word_h,word_m,word_l):
    return args_ry("INC", word_l)
def op_dec(word_h,word_m,word_l):
    return args_ry("DEC", word_l)
def op_dsz(word_h,word_m,word_l):
    return args_ry("DSZ", word_l)
def op_or_r0n(word_h,word_m,word_l):
    return args_r0n("OR", word_l)
def op_and_r0n(word_h,word_m,word_l):
    return args_r0n("AND", word_l)
def op_xor_r0n(word_h,word_m,word_l):
    return args_r0n("XOR", word_l)
def op_exr(word_h,word_m,word_l):
    return format("EXR %s" % word_l)
def op_bit(word_h,word_m,word_l):
    return args_rgm("BIT", word_l)
def op_bset(word_h,word_m,word_l):
    return args_rgm("BSET", word_l)
def op_bclr(word_h,word_m,word_l):
    return args_rgm("BCLR", word_l)
def op_btg(word_h,word_m,word_l):
    return args_rgm("BTG", word_l)
def op_rrc(word_h,word_m,word_l):
    return args_ry("RRC", word_l)
def op_ret(word_h,word_m,word_l):
    return args_r0n("RET", word_l)
def op_skip(word_h,word_m,word_l):
    flag = special_registers[int(word_l[:2],2)]
    return format("SKIP %s,0b%s" % (flag, word_l[2:]))

opcodes = {
    "0001": op_add_rxry,
    "0010": op_adc,
    "0011": op_sub,
    "0100": op_sbb,
    "0101": op_or_rxry,
    "0110": op_and_rxry,
    "0111": op_xor_rxry,
    "1000": op_mov_rxry,
    "1001": op_mov_rxn,
    "1010": op_mov_xyr0,
    "1011": op_mov_r0xy,
    "1100": op_mov_nnr0,
    "1101": op_mov_r0nn,
    "1110": op_mov_pcnn,
    "1111": op_jr,
    }

excodes = {
    "0000": op_cp,
    "0001": op_add_r0n,
    "0010": op_inc,
    "0011": op_dec,
    "0100": op_dsz,
    "0101": op_or_r0n,
    "0110": op_and_r0n,
    "0111": op_xor_r0n,
    "1000": op_exr,
    "1001": op_bit,
    "1010": op_bset,
    "1011": op_bclr,
    "1100": op_btg,
    "1101": op_rrc,
    "1110": op_ret,
    "1111": op_skip,
    }

named_registers = (
    "R0",
    "R1",
    "R2",
    "R3",
    "R4",
    "R5",
    "R6",
    "R7",
    "R8",
    "R9",
    "OUT",
    "IN",
    "JSR",
    "PCL",
    "PCM",
    "PCH",
    )

special_registers = (
    #Important: Don't change the index positons of C,NC,Z,NZ as they're
    #being used in generating machine code
    "C",
    "NC",
    "Z",
    "NZ",
    "PC",
)

def main():
    print("Four-Bit-Badge Disassembler version %s\n" % __version__)

    parser = argparse.ArgumentParser()
    parser.add_argument("hexfile", help=".hex file for disassembly")
    parser.add_argument("-q", help="Write to file without showing any human-readable output", action="store_true")
    parser.add_argument("-c", help="enable sourcecode readout", action="store_true")
    parser.add_argument("-n", help="enable line numbers", action="store_true")
    group = parser.add_mutually_exclusive_group()
    group.add_argument("-s", help="Show 12-bit instructions with spaces between words", action="store_true")
    group.add_argument("-w", help="Show 12-bit instructions without spaces between words", action="store_true")
    args = parser.parse_args()

    global show_output
    global show_verbose
    global show_linenums
    global show_words
    global show_wordspace
    global show_source

    if args.q:
        show_output = False
    if args.c:
        show_verbose = False
        show_source = True
    if args.n:
        show_verbose = False
        show_linenums = True
    if args.s:
        show_verbose = False
        show_words = True
    if args.w:
        show_verbose = False
        show_words = True
        show_wordspace = False

    ext_idx = args.hexfile.rfind('.')
    if ext_idx > 0:
        outfile = args.hexfile[:ext_idx] + ".s"
    else:
        outfile = args.hexfile + ".s"

    status = disassemble(read_hex_file(args.hexfile), print_output=show_output, outfile=outfile)
    if status == True:
        print("\nSuccessfully wrote asm file: %s\n" % outfile)

if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	import argparse
	from fbb_as import checksum

	__version__ = "0.6dev"
	header = [0x00, 0xFF, 0x00, 0xFF, 0xA5, 0xC3]

	#Output options
	show_output = True
	show_verbose = True #Prints all optional things (spaces, comments, empty lines)
	show_words = False
	show_wordspace = True
	show_linenums = False
	show_source = False

	def is_valid(hexarray, h=header):
	#Return True if hex string has header and correct checksum
	# Param: Hexarray is a list of hex values. There must be an even number of
	# elements, each pair arrange with low byte first, high second.
	# It must begin with the header values and end with a valid
	# 16-bit checksum as low-byte, high-byte pair for hex values
	header_len = len(h)
	hexarray_len = len(hexarray)

	if hexarray_len%2 != 0:
	raise Exception("Binary message must be an even number of bytes but %d were found." % hexarray_len)
	if hexarray[:6] != h:
	raise Exception("Binary message must begin with header: %s but found: %s" % (str(h),str(hexarray)))


	message = hexarray[header_len:-2]
	csum = checksum(message)
	if csum == hexarray[-2:]:
	return True

	return False

	def read_hex_file(filename):
	with open(filename, mode='rb') as file:
	stream = file.read()
	return [h for h in stream]

	def write_asm_file(filename, contents):
	with open(filename, 'w') as file:
	file.writelines("%s\n" % i for i in contents)
	return True

	def disassemble(hexarray, h=header, print_output=True, outfile=None):
	output_buffer = []
	if not is_valid(hexarray):
	raise Exception("Data has an invalid checksum")

	message = hexarray[len(h)+2:-2]
	for low,high in zip([iter(message)]2):
	byte_l = format(low,"08b")
	byte_h = format(high,"08b")
	word_l = byte_l[4:]
	word_m = byte_l[:4]
	word_h = byte_h[4:]
	line_number = 0

	if word_h == "0000":
	source = excodes[word_m](word_h,word_m,word_l)
	else:
	source = opcodes[word_h](word_h,word_m,word_l)

	this_line = format_output_line(line_number, word_h, word_m, word_l, source)
	line_number += 1
	if print_output:
	print(this_line)
	if outfile != None:
	output_buffer.append(this_line)

	if outfile != None:
	return write_asm_file(outfile,output_buffer)


	def format_output_line(ln, word_h, word_m, word_l, source):
	outstring = ""
	if show_linenums or show_verbose:
	outstring += format("%s\t" % format(ln," 5d"))

	if show_words or show_verbose:
	if show_wordspace or show_verbose:
	outstring += format("%s %s %s\t" % (word_h, word_m, word_l))
	else:
	outstring += format("%s%s%s\t" % (word_h, word_m, word_l))

	if show_source or show_verbose:
	outstring += format("%s" % source)

	return outstring.rstrip()

	def args_rxry(instruction, oper_x, oper_y):
	return format("%s %s,%s" % (instruction, named_registers[int(oper_x,2)], named_registers[int(oper_y,2)]))

	def args_ry(instruction, oper_y):
	return format("%s %s" % (instruction, named_registers[int(oper_y,2)]))

	def args_r0n(instruction, oper_y):
	return format("%s R0,0b%s" % (instruction, oper_y))

	def args_rgm(instruction, oper_y):
	reg = named_registers[int(oper_y[:2],2)]
	return format("%s %s,0b%s" % (instruction, reg, oper_y[2:]))

	def op_add_rxry(word_h,word_m,word_l):
	return args_rxry("ADD",word_m,word_l)
	def op_adc(word_h,word_m,word_l):
	return args_rxry("ADC",word_m,word_l)
	def op_sub(word_h,word_m,word_l):
	return args_rxry("SUB",word_m,word_l)
	def op_sbb(word_h,word_m,word_l):
	return args_rxry("SBB",word_m,word_l)
	def op_or_rxry(word_h,word_m,word_l):
	return args_rxry("OR",word_m,word_l)
	def op_and_rxry(word_h,word_m,word_l):
	return args_rxry("AND",word_m,word_l)
	def op_xor_rxry(word_h,word_m,word_l):
	return args_rxry("XOR",word_m,word_l)
	def op_mov_rxry(word_h,word_m,word_l):
	return args_rxry("MOV",word_m,word_l)
	def op_mov_rxn(word_h,word_m,word_l):
	return format("MOV %s,0b%s" % (named_registers[int(word_m,2)], word_l))
	def op_mov_xyr0(word_h,word_m,word_l):
	return format("MOV [%s:%s],R0" % (named_registers[int(word_m,2)], named_registers[int(word_l,2)]))
	def op_mov_r0xy(word_h,word_m,word_l):
	return format("MOV R0,[%s:%s]" % (named_registers[int(word_m,2)], named_registers[int(word_l,2)]))
	def op_mov_nnr0(word_h,word_m,word_l):
	return format("MOV [0b%s:0b%s],R0" % (word_m, word_l))
	def op_mov_r0nn(word_h,word_m,word_l):
	return format("MOV R0,[0b%s:0b%s]" % (word_m, word_l))
	def op_mov_pcnn(word_h,word_m,word_l):
	return format("MOV PC,[0b%s:0b%s]" % (word_m, word_l))
	def op_jr(word_h,word_m,word_l):
	return format("JR [0b%s:0b%s]" % (word_m, word_l))
	def op_cp(word_h,word_m,word_l):
	return args_r0n("CP", word_l)
	def op_add_r0n(word_h,word_m,word_l):
	return args_r0n("ADD", word_l)
	def op_inc(word_h,word_m,word_l):
	return args_ry("INC", word_l)
	def op_dec(word_h,word_m,word_l):
	return args_ry("DEC", word_l)
	def op_dsz(word_h,word_m,word_l):
	return args_ry("DSZ", word_l)
	def op_or_r0n(word_h,word_m,word_l):
	return args_r0n("OR", word_l)
	def op_and_r0n(word_h,word_m,word_l):
	return args_r0n("AND", word_l)
	def op_xor_r0n(word_h,word_m,word_l):
	return args_r0n("XOR", word_l)
	def op_exr(word_h,word_m,word_l):
	return format("EXR %s" % word_l)
	def op_bit(word_h,word_m,word_l):
	return args_rgm("BIT", word_l)
	def op_bset(word_h,word_m,word_l):
	return args_rgm("BSET", word_l)
	def op_bclr(word_h,word_m,word_l):
	return args_rgm("BCLR", word_l)
	def op_btg(word_h,word_m,word_l):
	return args_rgm("BTG", word_l)
	def op_rrc(word_h,word_m,word_l):
	return args_ry("RRC", word_l)
	def op_ret(word_h,word_m,word_l):
	return args_r0n("RET", word_l)
	def op_skip(word_h,word_m,word_l):
	flag = special_registers[int(word_l[:2],2)]
	return format("SKIP %s,0b%s" % (flag, word_l[2:]))

	opcodes = {
	"0001": op_add_rxry,
	"0010": op_adc,
	"0011": op_sub,
	"0100": op_sbb,
	"0101": op_or_rxry,
	"0110": op_and_rxry,
	"0111": op_xor_rxry,
	"1000": op_mov_rxry,
	"1001": op_mov_rxn,
	"1010": op_mov_xyr0,
	"1011": op_mov_r0xy,
	"1100": op_mov_nnr0,
	"1101": op_mov_r0nn,
	"1110": op_mov_pcnn,
	"1111": op_jr,
	}

	excodes = {
	"0000": op_cp,
	"0001": op_add_r0n,
	"0010": op_inc,
	"0011": op_dec,
	"0100": op_dsz,
	"0101": op_or_r0n,
	"0110": op_and_r0n,
	"0111": op_xor_r0n,
	"1000": op_exr,
	"1001": op_bit,
	"1010": op_bset,
	"1011": op_bclr,
	"1100": op_btg,
	"1101": op_rrc,
	"1110": op_ret,
	"1111": op_skip,
	}

	named_registers = (
	"R0",
	"R1",
	"R2",
	"R3",
	"R4",
	"R5",
	"R6",
	"R7",
	"R8",
	"R9",
	"OUT",
	"IN",
	"JSR",
	"PCL",
	"PCM",
	"PCH",
	)

	special_registers = (
	#Important: Don't change the index positons of C,NC,Z,NZ as they're
	#being used in generating machine code
	"C",
	"NC",
	"Z",
	"NZ",
	"PC",
	)

	def main():
	print("Four-Bit-Badge Disassembler version %s\n" % __version__)

	parser = argparse.ArgumentParser()
	parser.add_argument("hexfile", help=".hex file for disassembly")
	parser.add_argument("-q", help="Write to file without showing any human-readable output", action="store_true")
	parser.add_argument("-c", help="enable sourcecode readout", action="store_true")
	parser.add_argument("-n", help="enable line numbers", action="store_true")
	group = parser.add_mutually_exclusive_group()
	group.add_argument("-s", help="Show 12-bit instructions with spaces between words", action="store_true")
	group.add_argument("-w", help="Show 12-bit instructions without spaces between words", action="store_true")
	args = parser.parse_args()

	global show_output
	global show_verbose
	global show_linenums
	global show_words
	global show_wordspace
	global show_source

	if args.q:
	show_output = False
	if args.c:
	show_verbose = False
	show_source = True
	if args.n:
	show_verbose = False
	show_linenums = True
	if args.s:
	show_verbose = False
	show_words = True
	if args.w:
	show_verbose = False
	show_words = True
	show_wordspace = False

	ext_idx = args.hexfile.rfind('.')
	if ext_idx > 0:
	outfile = args.hexfile[:ext_idx] + ".s"
	else:
	outfile = args.hexfile + ".s"

	status = disassemble(read_hex_file(args.hexfile), print_output=show_output, outfile=outfile)
	if status == True:
	print("\nSuccessfully wrote asm file: %s\n" % outfile)

	if __name__ == "__main__":
	main()