-
-
Save szczys/b9a19714ea27d50be01d1a8479f97795 to your computer and use it in GitHub Desktop.
4-Bit Badge Assembler still in development
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import argparse | |
| import string | |
| __version__ = "0.6dev" | |
| comment_delimiter = ";" #Must be a single character or tokenizer will break | |
| accepted_chars = string.ascii_letters+string.digits+"._" | |
| special_delimiters = [',',':','[',']','+','-'] #Characters that me up the ASM language | |
| modifying_keywords = ["LOW","MID","HIGH"] | |
| modifying_operators = ["+","-"] | |
| #Output options | |
| show_output = True | |
| show_verbose = True #Prints all optional things (spaces, comments, empty lines) | |
| show_wordspace = False | |
| show_linenums = False | |
| show_comments = False | |
| header = [0x00, 0xFF, 0x00, 0xFF, 0xA5, 0xC3] | |
| named_registers = ( | |
| "R0", | |
| "R1", | |
| "R2", | |
| "R3", | |
| "R4", | |
| "R5", | |
| "R6", | |
| "R7", | |
| "R8", | |
| "R9", | |
| "OUT", | |
| "IN", | |
| "JSR", | |
| "PCL", | |
| "PCM", | |
| "PCH", | |
| ) | |
| special_registers = ( | |
| #Important: Don't change the index positons of C,NC,Z,NZ as they're | |
| #being used in generating machine code | |
| "C", | |
| "NC", | |
| "Z", | |
| "NZ", | |
| "PC", | |
| ) | |
| symbols = dict() | |
| def parse_asm(lines_of_asm,hexfile_out=None): | |
| machine_lines = 0 | |
| word_array = [] | |
| global symbols | |
| #First pass: Tokenize the input | |
| try: | |
| code_list, symbols = get_tokenized_code(lines_of_asm) | |
| except ParserError: | |
| #Error message will have already printed so bail | |
| return | |
| #Second pass: Generate output | |
| for i, c in enumerate(code_list): | |
| c = code_list[i] | |
| tokens = c.tokens | |
| #Print Blank Lines and Tokens in Verbose Mode | |
| if tokens == None: | |
| if show_verbose and show_output: | |
| print_output(c.source, None, None) | |
| continue | |
| elif tokens[0] in symbols: | |
| if show_verbose and show_output: | |
| print_output(c.source, None, None) | |
| continue | |
| #Do a substitution pass for variables in this set of tokens | |
| working_tokens = list() | |
| for t in tokens: | |
| if type(t) == SmartToken: | |
| working_tokens.append(t.resolve(symbols)) | |
| elif t in special_delimiters: | |
| continue | |
| else: | |
| working_tokens.append(t.upper()) | |
| try: | |
| opcode = working_tokens[0] | |
| if opcode.upper() == "ORG": | |
| binary = instructions.get(working_tokens[0])(working_tokens, machine_lines) | |
| else: | |
| binary = instructions.get(working_tokens[0])(working_tokens) | |
| except ParserError as e: | |
| print_error(e, i, c.source) | |
| return | |
| if binary == None: | |
| print_error(ParserError("E::Uncaught syntax error"), i, c.source) | |
| return | |
| else: | |
| for bin_line in binary.split('\n'): | |
| #Special directives GOSUB, GOTO, and ORG will return multiple lines | |
| ln = machine_lines if (show_linenums or show_verbose) else None | |
| cm = c.comment if (show_comments or show_verbose) else None | |
| if show_output: | |
| print_output(bin_line,ln,cm) | |
| machine_lines += 1 | |
| if hexfile_out != None: | |
| word_array.append(bin_line.replace(" ","")) | |
| if hexfile_out != None: | |
| with open(hexfile_out, "wb") as f: | |
| f.write(bytes(generate_hex(word_array))) | |
| return True | |
| def get_tokenized_code(lines_of_asm): | |
| code_array = [] | |
| symbols = dict() | |
| reg_addr = 0 | |
| raw_code = lines_of_asm.split('\n') | |
| for i in range(len(raw_code)): | |
| try: | |
| code_obj = parse_line(raw_code[i]) | |
| except ParserError as e: | |
| print_error(e, i, raw_code[i]) | |
| raise ParserError() | |
| code_array.append(code_obj) | |
| #Validate the opcodes/labels during first pass | |
| if code_obj.tokens != None: | |
| token = code_obj.tokens[0].upper() | |
| if token not in instructions: | |
| #This must be a symbol or a variable definition | |
| if token in symbols: | |
| print_error("E::Cannot define a token that was previously defined", i, code_obj.source) | |
| raise ParserError() | |
| else: | |
| #Prewind the register number for use if this turns out to be a label | |
| s_value = reg_addr | |
| if len(code_obj.tokens) > 1: | |
| if code_obj.tokens[1].upper() == "EQU": | |
| #This is a variable definition, reset this value for the symbol table | |
| s_value = code_obj.tokens[2].resolve(symbols) | |
| #Write correct value to symbols table | |
| symbols[token] = s_value | |
| else: | |
| if token == "GOSUB" or token == "GOTO": | |
| #These directives will add two lines of code instead of one so adjust here | |
| reg_addr += 2 | |
| elif token == "ORG": | |
| new_linenum = get_dec_or_token(code_obj.tokens[1].resolve([])) | |
| if type(new_linenum) != int or not 0 <= new_linenum < 4096: | |
| print_error("E::This opcode requires a number [0..4095] as argument but got %s" % new_linenum, i, code_obj.source) | |
| raise ParserError() | |
| elif new_linenum <= reg_addr: | |
| raise ParserError("E::The ORG opcode requires the argument (%d) be greater than the current program memory register number (%d)" % (tokens[1], linenum)) | |
| else: | |
| reg_addr = new_linenum | |
| else: | |
| #All other instructions increment the address by one | |
| reg_addr += 1 | |
| return code_array,symbols | |
| def tokenize(instring, delimiters=special_delimiters+[comment_delimiter]): | |
| ''' | |
| Tokenize a string of ASM code, splitting based on special characters | |
| but at the same time including delimiters (but not whitespace) in the set | |
| ''' | |
| tokens = instring.split() | |
| for d in delimiters: | |
| newtokens = list() | |
| for t in tokens: | |
| raw = t.split(d) | |
| for r_idx, r_token in enumerate(raw): | |
| if r_token != '': | |
| ''' | |
| element will be empty when delimiter begins or | |
| ends the string that was split | |
| so don't add empty elements | |
| ''' | |
| newtokens.append(r_token) | |
| if r_idx != len(raw)-1: | |
| newtokens.append(d) | |
| tokens = newtokens | |
| return tokens | |
| def parse_line(instring): | |
| ''' | |
| Performs the work of tokenizing a single line of code, | |
| ensuring that the syntax is valid (although this does | |
| not mean the combo of instructions and operands is valid.) | |
| Returns CodePack() object | |
| ''' | |
| raw_tokens = tokenize(instring) | |
| parsed_tokens = list() | |
| code = CodePack(source=instring) | |
| s = TokenizerStates() | |
| #Preserve full comment | |
| if comment_delimiter in instring: | |
| d_idx = instring.index(comment_delimiter) | |
| code.comment = instring[d_idx:] | |
| #Custom type to contain Token streams. Will be used for symbol substition and math in second pass | |
| token_stream = SmartToken() | |
| #Walk through the flowchart | |
| for i, e in enumerate(raw_tokens): #i=index e=element | |
| e = e.upper() | |
| if s.cur_state==s.OPCODE: | |
| if all(c in accepted_chars for c in e): | |
| parsed_tokens.append(e) | |
| s.cur_state = s.WATCH_TOKEN_SET | |
| continue | |
| elif e == comment_delimiter: | |
| #Comment already preserved, nothing left to parse | |
| break | |
| else: | |
| raise ParserError("E::Syntax error: Invalid characters found in: %s" % e) | |
| elif s.cur_state==s.WATCH_COMMENT: | |
| #The only trigger character valid from here on out is comment_delimiter | |
| if e != comment_delimiter: | |
| raise ParserError("E::Expected comment (%s) or end of line but got %s" % (comment_delimiter, e)) | |
| else: | |
| #Comment already preserved, nothing left to do | |
| break | |
| elif s.cur_state==s.WATCH_COMMA_COMMENT: | |
| if e==",": | |
| #Should be another token or set coming | |
| s.cur_state = s.WATCH_TOKEN_SET | |
| continue | |
| elif e==comment_delimiter: | |
| #Comment already preserved, nothing left to do | |
| break | |
| else: | |
| raise ParserError("E::Expected comma (,) or comment (%s) but got %s" % (comment_delimiter, e)) | |
| elif s.cur_state in [s.WATCH_TOKEN_SET, | |
| s.TOKEN_COLON_BRACKET, | |
| s.TOKEN_BRACKET, | |
| s.TOKEN_COMMA_COMMENT, | |
| s.TOKEN_COMMENT, | |
| s.TOKEN_VAR_DEF]: | |
| ''' | |
| Need to know what came before. This is tricky because sometimes we will already | |
| be filling a token stream and other times we'll just be starting one | |
| ''' | |
| if len(token_stream) != 0: | |
| previous = token_stream[-1] | |
| else: | |
| previous = parsed_tokens[-1] | |
| if type(previous) == int: | |
| prev_isvalid = True | |
| else: | |
| #Raised error tells us this is not a valid token but has special characters in it | |
| try: | |
| validate_token(previous) | |
| prev_isvalid = True | |
| except: | |
| prev_isvalid = False | |
| if e=="EQU": | |
| if s.cur_state == s.WATCH_TOKEN_SET and len(parsed_tokens)==1 and len(token_stream)==0: | |
| #Found EQU in the right place | |
| parsed_tokens.append(e) | |
| s.cur_state = s.TOKEN_VAR_DEF | |
| continue | |
| raise ParserError("E::Unexpected EQU after %s" % str(previous)) | |
| elif e=='[': | |
| if s.cur_state == s.WATCH_TOKEN_SET: | |
| #Found [ in right place | |
| token_stream.append(e) | |
| s.cur_state = s.TOKEN_COLON_BRACKET | |
| continue | |
| raise ParserError("E::Unexpected opening bracket ([) after %s" % str(previous)) | |
| elif e==':': | |
| if s.cur_state == s.TOKEN_COLON_BRACKET and prev_isvalid: | |
| #Found : in right place | |
| token_stream.append(e) | |
| s.cur_state = s.TOKEN_BRACKET | |
| continue | |
| elif len(parsed_tokens)==1 and prev_isvalid: | |
| #This is a label assignment; Nothing should come after this but a comment | |
| #This colon is not inside of brackets so it isn't part of a token_stream; it's part of parsed_tokens | |
| parsed_tokens.append(e) | |
| s.cur_state = s.WATCH_COMMENT | |
| continue | |
| raise ParserError("E::Unexpected colon (:) after %s" % str(previous)) | |
| elif e=="]": | |
| if s.cur_state in [s.TOKEN_BRACKET, s.TOKEN_COLON_BRACKET] and prev_isvalid: | |
| #Found ] in right place | |
| token_stream.append(e) | |
| parsed_tokens.append(token_stream) | |
| token_stream = SmartToken() | |
| s.cur_state = s.WATCH_COMMA_COMMENT | |
| continue | |
| raise ParserError("E::Unexpected closing bracket (]) after " % str(previous)) | |
| elif e==",": | |
| if s.cur_state == s.TOKEN_COMMA_COMMENT and prev_isvalid: | |
| #Found , in right place | |
| if len(token_stream) != 0: | |
| parsed_tokens.append(token_stream) | |
| token_stream = SmartToken() | |
| parsed_tokens.append(e) | |
| s.cur_state = s.WATCH_TOKEN_SET | |
| continue | |
| raise ParserError("E::Unexpected comma (,) after %s" % str(previous)) | |
| elif e==";": | |
| if s.cur_state in [s.TOKEN_COMMA_COMMENT, s.TOKEN_COMMENT, s.TOKEN_VAR_DEF] and prev_isvalid: | |
| if len(token_stream) != 0: | |
| parsed_tokens.append(token_stream) | |
| token_stream = SmartToken() #Probably don't need to reset this but just in case | |
| if s.cur_state==s.TOKEN_VAR_DEF: | |
| if len(parsed_tokens) != 3: | |
| raise ParserError("E::Wrong number of items in EQU statement") | |
| #Found ; in right place, no need to parse more | |
| break | |
| raise ParserError("E::Unexpected opening semicolon (;) after " % str(previous)) | |
| elif e in modifying_keywords: | |
| if previous in token_preceders: | |
| #Valid HIGH/LOW modifier | |
| token_stream.append(e) | |
| continue | |
| raise ParserError("E::Unexpected modifier %s after %s" % (e,str(previous))) | |
| elif e in modifying_operators: | |
| if previous in token_preceders: | |
| #Hack: add 0 before leading operator (like -/+) | |
| token_stream.append(0) | |
| token_stream.append(e) | |
| continue | |
| elif prev_isvalid: | |
| #Operators can follow valid tokens | |
| token_stream.append(e) | |
| continue | |
| raise ParserError("E::Unexpected operator %s after %s" % (e,str(previous))) | |
| else: | |
| #Should have taken care of all modifiers and dividers | |
| #This will raise an error if it is not a valid token being added | |
| token_stream.append(validate_token(e)) | |
| if s.cur_state == s.WATCH_TOKEN_SET: | |
| #We got the token we were watching for so now look for more of this token, or a comma or comment | |
| s.cur_state = s.TOKEN_COMMA_COMMENT | |
| continue | |
| else: | |
| raise ParserError("E::Unknown parser state machine cur_state value: %s" % s.cur_state) | |
| #Catch any token_streams that weren't written | |
| if len(token_stream) != 0: | |
| parsed_tokens.append(token_stream) | |
| if len(parsed_tokens) != 0: | |
| code.tokens = parsed_tokens | |
| return code | |
| def validate_token(e): | |
| ''' | |
| Validates the token part of the stoken stream (letter, numbers, underscore, period) | |
| but raised error if special characters like +,-,[,] are found. | |
| ''' | |
| if all(c in accepted_chars for c in e): | |
| return e | |
| else: | |
| raise ParserError("E::Illegal characters in token: %s", e) | |
| def format_unexpected_char_error(element,previous): | |
| return format("E::Syntax error: Unexpected %s after %s" % (element,str(previous))) | |
| class TokenizerStates: | |
| def __init__(self): | |
| self.OPCODE = 0 | |
| self.TOKEN_COMMA_COMMENT = 1 | |
| self.TOKEN_COMMENT = 2 | |
| self.TOKEN_COLON_BRACKET = 3 | |
| self.TOKEN_BRACKET = 4 | |
| self.BRACKETS = 5 | |
| self.WATCH_TOKEN_SET = 6 | |
| self.WATCH_COMMA_COMMENT = 7 | |
| self.WATCH_COMMENT = 8 | |
| self.TOKEN_VAR_DEF = 9 | |
| self.cur_state = self.OPCODE | |
| self.reset_buffers() | |
| def reset_buffers(self): | |
| self.bracket_token_buffer = [] | |
| class SmartToken(list): | |
| def __init__(self, data=None): | |
| if (data != None): | |
| self._stream = list(data) | |
| else: | |
| self._stream = list() | |
| def resolve(self, symbols): | |
| is_set = False | |
| found_named_reg = False | |
| prefix = None | |
| working_token = None | |
| resolved_set = None | |
| for i, e in enumerate(self): | |
| try: | |
| e = e.upper() | |
| except: | |
| pass | |
| if e in named_registers+special_registers: | |
| found_named_reg = True | |
| if e=='[': | |
| if i==0: | |
| is_set = True | |
| resolved_set = list() | |
| continue | |
| else: | |
| raise Exception("Unexpected opening bracket when parsing smart token. This should never happen") | |
| elif e==":": | |
| resolved_set.append(working_token) | |
| found_named_reg = False | |
| prefix = None | |
| working_token = None | |
| continue | |
| elif e=="]": | |
| if i==len(self)-1: | |
| resolved_set.append(working_token) | |
| found_named_reg = False | |
| prefix = None | |
| working_token = None | |
| continue | |
| else: | |
| raise Exception("Unexpected opening bracket when parsing smart token. This should never happen") | |
| elif e in modifying_keywords+modifying_operators: | |
| prefix = e | |
| continue | |
| #Everything that's not a symbol or a token has been filtered out by now | |
| if e in symbols: | |
| filtered_t = symbols[e] | |
| else: | |
| filtered_t = get_dec_or_token(e) | |
| if prefix != None: | |
| if found_named_reg == True: | |
| #If a named register is already in the working_token this will already be set to True | |
| raise ParserError("E::Syntax error: Modifications like (%s) may only be performed on numbers but a named register was found." % prefix) | |
| if type(filtered_t) != int: | |
| raise ParserError("E::Syntax error: Modifications like (%s) may only be performed on numbers but %s was found." % prefix) | |
| if prefix=="LOW": | |
| twelvebits = format(filtered_t, "012b") | |
| working_token = int(twelvebits[8:],2) | |
| elif prefix=="MID": | |
| twelvebits = format(filtered_t, "012b") | |
| working_token = int(twelvebits[4:8],2) | |
| elif prefix=="HIGH": | |
| twelvebits = format(filtered_t, "012b") | |
| working_token = int(twelvebits[:4],2) | |
| elif prefix=="+": | |
| working_token += filtered_t | |
| elif prefix=="-": | |
| working_token -= filtered_t | |
| elif working_token == None: | |
| working_token = filtered_t | |
| else: | |
| raise Exception("Error, multiple tokens without modifiers. This should never happen") | |
| if is_set: | |
| if resolved_set != None: | |
| return resolved_set | |
| else: | |
| if working_token != None: | |
| return working_token | |
| raise Exception("Error, SmartToken.resolve() was unable to finish and didn't raise ParserError(). This should never happen.") | |
| def get_dec_or_token(token): | |
| #Takes a string | |
| # returns a decimal number if that string was a number (decimal, hex, or binary) | |
| # otherwise returns the string | |
| if type(token) == int: | |
| return token | |
| base = 10 | |
| if len(token) > 2: | |
| if token[:2].lower() == "0x": | |
| base = 16 | |
| elif token[:2].lower() == "0b": | |
| base = 2 | |
| try: | |
| #Try to return it as a number | |
| return int(token, base) | |
| except: | |
| #Otherwise it must be a token | |
| return token | |
| def checksum(hexarray): | |
| #Returns 16-bit checksum | |
| # Param: Hexarray is an array of hex values. There must be an even number, | |
| # each pair arrange with low byte first, high second | |
| # | |
| # Return: 16-bit checksum as two hex values, low byte first, high second | |
| checksum = 0 | |
| for low,high in zip(*[iter(hexarray)]*2): | |
| checksum += low + (high*256) | |
| checksum = checksum % 0xFFFF | |
| byte_h = int(checksum/256) | |
| byte_l = int(checksum%256) | |
| return [byte_l, byte_h] | |
| def generate_hex(program_list, h=header): | |
| len_message = twelve_bit_to_hex_bytes(format(len(program_list),"012b")) | |
| message = len_message | |
| for i in program_list: | |
| message += twelve_bit_to_hex_bytes(i) | |
| #This is a dirty hack because checksum takes and returns strings | |
| message += checksum(message) | |
| message = h + message | |
| return message | |
| def twelve_bit_to_hex_bytes(twelvebit): | |
| #Takes a string that is a 12-bit binary number | |
| #Returns two decimal bytes, low byte first | |
| return [int(twelvebit[-8:],2),int(twelvebit[:4],2)] | |
| def args_rxry(tokens,opcode): | |
| #ADD ADC SUB SBB OR AND XOR MOV | |
| arg_count_test(len(tokens),3) | |
| if tokens[1] in named_registers and tokens[2] in named_registers: | |
| return format_output(opcode, get_reg_binary(tokens[1]), get_reg_binary(tokens[2])) | |
| else: | |
| raise ParserError("E::This opcode requires register names as arguments") | |
| def args_r0n(tokens,opcode): | |
| #CP ADD OR AND XOR RET | |
| arg_count_test(len(tokens),3) | |
| if tokens[1] != "R0": | |
| raise ParserError("E::This opcode requires R0 as the first argument") | |
| elif not is_int(tokens[2]): | |
| raise ParserError("E::This opcode requires a number as the second argument") | |
| else: | |
| return format_output("0000", opcode, get_four_bit_binary(tokens[2])) | |
| def args_ry(tokens,opcode): | |
| #INC DEC DSZ RRC | |
| arg_count_test(len(tokens),2) | |
| if tokens[1] not in named_registers: | |
| raise ParserError("E::This opcode requires a register name as the argument") | |
| else: | |
| return format_output("0000", opcode, get_reg_binary(tokens[1])) | |
| def args_rgm(tokens,opcode): | |
| #BIT BSET BCLR BTG | |
| arg_count_test(len(tokens),3) | |
| if tokens[1] not in ["R0","R1","R2","R3"]: | |
| raise ParserError("E::This opcode requires R0, R1, R2, or R3 as the first argument") | |
| elif not is_int(tokens[2]) or not 0 <= tokens[2] < 4: | |
| raise ParserError("E::This opcode requires a number [0..3] as the second argument") | |
| else: | |
| return format_output("0000", opcode, get_reg_binary(tokens[1])[2:]+get_four_bit_binary(tokens[2])[2:]) | |
| def args_go(tokens,trigger_reg): | |
| arg_count_test(len(tokens),2) | |
| reg_value = tokens[1] | |
| if 0 <= reg_value < 4096: | |
| word_h, word_m, word_l = pc_addr_to_bin(reg_value) | |
| two_lines = format_output("1110", word_h, word_m) + "\n" + format_output("1001",trigger_reg,word_l) | |
| return two_lines | |
| else: | |
| raise ParserError("E::Register value is out of range (0 <= reg_value < 4096): %d" % reg_value) | |
| def opcode_add(tokens): | |
| #ADD R0,N | |
| if is_int(tokens[2]): | |
| return args_r0n(tokens,"0001") | |
| #ADD RX,RY | |
| else: | |
| return args_rxry(tokens,"0001") | |
| def opcode_adc(tokens): | |
| return args_rxry(tokens,"0010") | |
| def opcode_sub(tokens): | |
| return args_rxry(tokens,"0011") | |
| def opcode_sbb(tokens): | |
| return args_rxry(tokens,"0100") | |
| def opcode_or(tokens): | |
| #OR R0,N | |
| if is_int(tokens[2]): | |
| return args_r0n(tokens,"0101") | |
| #OR RX,RY | |
| else: | |
| return args_rxry(tokens,"0101") | |
| def opcode_and(tokens): | |
| #AND R0,N | |
| if is_int(tokens[2]): | |
| return args_r0n(tokens,"0110") | |
| #AND RX,RY | |
| else: | |
| return args_rxry(tokens,"0110") | |
| def opcode_xor(tokens): | |
| #XOR R0,N | |
| if is_int(tokens[2]): | |
| return args_r0n(tokens,"0111") | |
| #XOR RX,RY | |
| else: | |
| return args_rxry(tokens,"0111") | |
| def opcode_mov(tokens): | |
| arg_count_test(len(tokens),3) | |
| if any(isinstance(i,list) for i in tokens): | |
| #Must be instruction containing a set of brackets | |
| if tokens[1] == "PC": | |
| #MOV PC,NN | |
| if all(is_int(i) for i in tokens[2]): | |
| word_high, word_low = get_eight_bit_binary(tokens[2]) | |
| return format_output("1110", word_high, word_low) | |
| else: | |
| raise ParserError("E::Expected literal number for MOV PC, NN") | |
| elif "R0" in tokens: | |
| if tokens[1] == "R0": | |
| if all(is_int(i) for i in tokens[2]): | |
| #MOV R0,[NN] | |
| word_high, word_low = get_eight_bit_binary(tokens[2]) | |
| return format_output("1101", word_high, word_low) | |
| elif len(tokens[2]) == 2 and all(i in named_registers for i in tokens[2]): | |
| #MOV R0,[XY] | |
| return format_output("1011", get_reg_binary(tokens[2][0]), get_reg_binary(tokens[2][1])) | |
| else: | |
| raise ParserError("E::Type mismatch for values inside brackets") | |
| elif tokens[2] == "R0": | |
| if all(is_int(i) for i in tokens[1]): | |
| #MOV [NN],R0 | |
| word_high, word_low = get_eight_bit_binary(tokens[1]) | |
| return format_output("1100", word_high, word_low) | |
| elif len(tokens[1]) == 2 and all(i in named_registers for i in tokens[1]): | |
| #MOV [XY],R0 | |
| return format_output("1010", get_reg_binary(tokens[1][0]),get_reg_binary(tokens[1][1])) | |
| else: | |
| raise ParserError("E::Type mismatch for values inside brackets") | |
| else: | |
| raise Exception() | |
| #Catch edge case syntax error | |
| elif tokens[1] == "PC": | |
| raise ParserError("E::Syntax error, numeric literal must be in brackets for PC,[NN]") | |
| #MOV RX,RY | |
| elif tokens[1] in named_registers and tokens[2] in named_registers: | |
| return args_rxry(tokens,"1000") | |
| #MOV RX,N | |
| elif is_int(tokens[2]): | |
| if tokens[1] in named_registers: | |
| return format_output("1001", get_reg_binary(tokens[1]), get_four_bit_binary(tokens[2])) | |
| else: | |
| raise ParserError("E::Expected register name for first argument of MOV RX,N") | |
| else: | |
| raise Exception() | |
| def opcode_jr(tokens): | |
| arg_count_test(len(tokens),2) | |
| if all(is_int(i) for i in tokens[1]): | |
| word_high, word_low = get_eight_bit_binary(tokens[1], signed=True) | |
| return format_output("1111", word_high, word_low) | |
| else: | |
| raise ParserError("E::This opcode requires two numbers as arguments") | |
| def opcode_cp(tokens): | |
| return args_r0n(tokens,"0000") | |
| def opcode_inc(tokens): | |
| return args_ry(tokens,"0010") | |
| def opcode_dec(tokens): | |
| return args_ry(tokens,"0011") | |
| def opcode_dsz(tokens): | |
| return args_ry(tokens,"0100") | |
| def opcode_exr(tokens): | |
| arg_count_test(len(tokens),2) | |
| if is_int(tokens[1]): | |
| return format_output("0000", "1000", get_four_bit_binary(tokens[1])) | |
| else: | |
| raise ParserError("E::This opcode requires a number as the argument") | |
| def opcode_bit(tokens): | |
| return args_rgm(tokens,"1001") | |
| def opcode_bset(tokens): | |
| return args_rgm(tokens,"1010") | |
| def opcode_bclr(tokens): | |
| return args_rgm(tokens,"1011") | |
| def opcode_btg(tokens): | |
| return args_rgm(tokens,"1100") | |
| def opcode_rrc(tokens): | |
| return args_ry(tokens,"1101") | |
| def opcode_ret(tokens): | |
| return args_r0n(tokens,"1110") | |
| def opcode_skip(tokens): | |
| #SKIP F,M | |
| arg_count_test(len(tokens),3) | |
| if tokens[1] not in special_registers[:4]: | |
| raise ParserError("E::This opcode requires %s, %s, %s, or %s as the first argument" % special_registers[:4]) | |
| elif not is_int(tokens[2]) or not 0 <= tokens[2] < 4: | |
| raise ParserError("E::This opcode requires a number [0..3] as the second argument") | |
| else: | |
| F = format(special_registers.index(tokens[1]),"02b") | |
| return format_output("0000", "1111", F + get_four_bit_binary(tokens[2])[2:]) | |
| def opcode_goto(tokens): | |
| return args_go(tokens,get_reg_binary("PCL")) | |
| def opcode_gosub(tokens): | |
| return args_go(tokens,get_reg_binary("JSR")) | |
| def opcode_org(tokens,linenum): | |
| arg_count_test(len(tokens),2) | |
| #This should have been validated as a number within range and greater than | |
| #current program memory register when the symbols table was calculated | |
| #in get_tokenized_code() | |
| lines_to_fill = tokens[1] - linenum | |
| return str("0000 0000 0000\n"*lines_to_fill).rstrip('\n') | |
| def is_int(val): | |
| return(type(val) == int) | |
| def get_four_bit_binary(value): | |
| #Convert int to 4-bit binary | |
| #Raise if out of range | |
| if not 0 <= value < 16: | |
| raise ParserError("E::Literal value out of range. Expected [0..15]") | |
| else: | |
| return format(value, "04b") | |
| def get_eight_bit_binary(brackets, signed=False): | |
| #Convert array containing numbers to two 4-bit binary values | |
| #Raise if out of range | |
| if len(brackets) == 1: | |
| if not 0 <= brackets[0] < 256 and signed==False: | |
| raise ParserError("E::Literal value out of range. Expected [0..255]") | |
| elif not -128 <= brackets[0] < 127 and signed==True: | |
| raise ParserError("E::Literal value out of range. Expected [-128..127]") | |
| else: | |
| bin_byte = format(brackets[0] & int("11111111",2),"08b") | |
| return bin_byte[:4], bin_byte[4:] | |
| else: | |
| if any(not 0 <= i < 16 for i in brackets): | |
| raise ParserError("E::Literal value out of range. Expected [0..15]") | |
| else: | |
| return format(brackets[0], "04b"), format(brackets[1], "04b") | |
| def get_reg_binary(reg_name): | |
| #Returns 4-bit binary value of a named register | |
| return format(named_registers.index(reg_name),"04b") | |
| def arg_count_test(actual, expected): | |
| #Token count includes opcode so arg count will be one less | |
| if actual != expected: | |
| raise ParserError("E::Expected %d arguments for this opcode but got %d" % (expected-1, actual-1)) | |
| def format_output(*arg): | |
| #Argument list must all be strings | |
| if show_wordspace or show_verbose: | |
| format_string = ("%s " * len(arg)).rstrip() | |
| else: | |
| format_string = ("%s" * len(arg)).rstrip() | |
| outstring = format(format_string % arg) | |
| return outstring | |
| def pc_addr_to_bin(pc_num): | |
| binary = format(pc_num, "012b") | |
| return (binary[:4],binary[4:8],binary[8:]) | |
| def print_output(binary,ln,cm): | |
| outstring = "" | |
| if ln != None: | |
| outstring += format(ln, " 4") + '\t' | |
| outstring += binary | |
| if cm != None: | |
| outstring += '\t' + cm | |
| print(outstring) | |
| def print_error(e, line_num, line): | |
| print("%s\n\tLine %d:\t%s" % (e,line_num,line)) | |
| def read_asm_file(filename): | |
| with open(filename, 'r') as file: | |
| stream = file.read() | |
| return stream | |
| class CodePack: | |
| def __init__(self, tokens=None, comment=None, source=None): | |
| self.tokens = tokens | |
| self.comment = comment | |
| self.source = source | |
| class ParserError(Exception): | |
| pass | |
| instructions = { | |
| "ADD": opcode_add, # 17 | |
| "ADC": opcode_adc, | |
| "SUB": opcode_sub, | |
| "SBB": opcode_sbb, | |
| "OR": opcode_or, #21 | |
| "AND": opcode_and, #22 | |
| "XOR": opcode_xor, #23 | |
| "MOV": opcode_mov, #9,10,11,12,13,14 | |
| "JR": opcode_jr, | |
| "CP": opcode_cp, | |
| "INC": opcode_inc, | |
| "DEC": opcode_dec, | |
| "DSZ": opcode_dsz, | |
| "EXR": opcode_exr, | |
| "BIT": opcode_bit, | |
| "BSET": opcode_bset, | |
| "BCLR": opcode_bclr, | |
| "BTG": opcode_btg, | |
| "RRC": opcode_rrc, | |
| "RET": opcode_ret, | |
| "SKIP": opcode_skip, | |
| "GOTO": opcode_goto, | |
| "GOSUB": opcode_gosub, | |
| "ORG": opcode_org, | |
| } | |
| token_preceders = [*instructions]+["EQU","[",":",","] | |
| def main(): | |
| print("Four-Bit-Badge Assembler version %s\n" % __version__) | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("asmfile", help="assembly language file to be processed") | |
| parser.add_argument("-q", help="Write to file without showing any human-readable output", action="store_true") | |
| parser.add_argument("-c", help="enable comments in readout", action="store_true") | |
| parser.add_argument("-n", help="enable line numbers in readout", action="store_true") | |
| group = parser.add_mutually_exclusive_group() | |
| group.add_argument("-s", help="Show 12-bit instructions with spaces between words", action="store_true") | |
| group.add_argument("-w", help="Show 12-bit instructions without spaces between words", action="store_true") | |
| args = parser.parse_args() | |
| global show_output | |
| global show_verbose | |
| global show_wordspace | |
| global show_linenums | |
| global show_comments | |
| if args.q: | |
| show_output = False | |
| if args.c: | |
| show_verbose = False | |
| show_comments = True | |
| if args.n: | |
| show_verbose = False | |
| show_linenums = True | |
| if args.s: | |
| show_verbose = False | |
| show_wordspace = True | |
| if args.w: | |
| show_verbose = False | |
| show_wordspace = False | |
| ext_idx = args.asmfile.rfind('.') | |
| if ext_idx > 0: | |
| outfile = args.asmfile[:ext_idx] + ".hex" | |
| else: | |
| outfile = args.asmfile + ".hex" | |
| if parse_asm(read_asm_file(args.asmfile),hexfile_out=outfile) == True: | |
| print("\nSuccessfully wrote hex file: %s\n" % outfile) | |
| if __name__ == "__main__": | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import argparse | |
| from fbb_as import checksum | |
| __version__ = "0.6dev" | |
| header = [0x00, 0xFF, 0x00, 0xFF, 0xA5, 0xC3] | |
| #Output options | |
| show_output = True | |
| show_verbose = True #Prints all optional things (spaces, comments, empty lines) | |
| show_words = False | |
| show_wordspace = True | |
| show_linenums = False | |
| show_source = False | |
| def is_valid(hexarray, h=header): | |
| #Return True if hex string has header and correct checksum | |
| # Param: Hexarray is a list of hex values. There must be an even number of | |
| # elements, each pair arrange with low byte first, high second. | |
| # It must begin with the header values and end with a valid | |
| # 16-bit checksum as low-byte, high-byte pair for hex values | |
| header_len = len(h) | |
| hexarray_len = len(hexarray) | |
| if hexarray_len%2 != 0: | |
| raise Exception("Binary message must be an even number of bytes but %d were found." % hexarray_len) | |
| if hexarray[:6] != h: | |
| raise Exception("Binary message must begin with header: %s but found: %s" % (str(h),str(hexarray))) | |
| message = hexarray[header_len:-2] | |
| csum = checksum(message) | |
| if csum == hexarray[-2:]: | |
| return True | |
| return False | |
| def read_hex_file(filename): | |
| with open(filename, mode='rb') as file: | |
| stream = file.read() | |
| return [h for h in stream] | |
| def write_asm_file(filename, contents): | |
| with open(filename, 'w') as file: | |
| file.writelines("%s\n" % i for i in contents) | |
| return True | |
| def disassemble(hexarray, h=header, print_output=True, outfile=None): | |
| output_buffer = [] | |
| if not is_valid(hexarray): | |
| raise Exception("Data has an invalid checksum") | |
| message = hexarray[len(h)+2:-2] | |
| for low,high in zip(*[iter(message)]*2): | |
| byte_l = format(low,"08b") | |
| byte_h = format(high,"08b") | |
| word_l = byte_l[4:] | |
| word_m = byte_l[:4] | |
| word_h = byte_h[4:] | |
| line_number = 0 | |
| if word_h == "0000": | |
| source = excodes[word_m](word_h,word_m,word_l) | |
| else: | |
| source = opcodes[word_h](word_h,word_m,word_l) | |
| this_line = format_output_line(line_number, word_h, word_m, word_l, source) | |
| line_number += 1 | |
| if print_output: | |
| print(this_line) | |
| if outfile != None: | |
| output_buffer.append(this_line) | |
| if outfile != None: | |
| return write_asm_file(outfile,output_buffer) | |
| def format_output_line(ln, word_h, word_m, word_l, source): | |
| outstring = "" | |
| if show_linenums or show_verbose: | |
| outstring += format("%s\t" % format(ln," 5d")) | |
| if show_words or show_verbose: | |
| if show_wordspace or show_verbose: | |
| outstring += format("%s %s %s\t" % (word_h, word_m, word_l)) | |
| else: | |
| outstring += format("%s%s%s\t" % (word_h, word_m, word_l)) | |
| if show_source or show_verbose: | |
| outstring += format("%s" % source) | |
| return outstring.rstrip() | |
| def args_rxry(instruction, oper_x, oper_y): | |
| return format("%s %s,%s" % (instruction, named_registers[int(oper_x,2)], named_registers[int(oper_y,2)])) | |
| def args_ry(instruction, oper_y): | |
| return format("%s %s" % (instruction, named_registers[int(oper_y,2)])) | |
| def args_r0n(instruction, oper_y): | |
| return format("%s R0,0b%s" % (instruction, oper_y)) | |
| def args_rgm(instruction, oper_y): | |
| reg = named_registers[int(oper_y[:2],2)] | |
| return format("%s %s,0b%s" % (instruction, reg, oper_y[2:])) | |
| def op_add_rxry(word_h,word_m,word_l): | |
| return args_rxry("ADD",word_m,word_l) | |
| def op_adc(word_h,word_m,word_l): | |
| return args_rxry("ADC",word_m,word_l) | |
| def op_sub(word_h,word_m,word_l): | |
| return args_rxry("SUB",word_m,word_l) | |
| def op_sbb(word_h,word_m,word_l): | |
| return args_rxry("SBB",word_m,word_l) | |
| def op_or_rxry(word_h,word_m,word_l): | |
| return args_rxry("OR",word_m,word_l) | |
| def op_and_rxry(word_h,word_m,word_l): | |
| return args_rxry("AND",word_m,word_l) | |
| def op_xor_rxry(word_h,word_m,word_l): | |
| return args_rxry("XOR",word_m,word_l) | |
| def op_mov_rxry(word_h,word_m,word_l): | |
| return args_rxry("MOV",word_m,word_l) | |
| def op_mov_rxn(word_h,word_m,word_l): | |
| return format("MOV %s,0b%s" % (named_registers[int(word_m,2)], word_l)) | |
| def op_mov_xyr0(word_h,word_m,word_l): | |
| return format("MOV [%s:%s],R0" % (named_registers[int(word_m,2)], named_registers[int(word_l,2)])) | |
| def op_mov_r0xy(word_h,word_m,word_l): | |
| return format("MOV R0,[%s:%s]" % (named_registers[int(word_m,2)], named_registers[int(word_l,2)])) | |
| def op_mov_nnr0(word_h,word_m,word_l): | |
| return format("MOV [0b%s:0b%s],R0" % (word_m, word_l)) | |
| def op_mov_r0nn(word_h,word_m,word_l): | |
| return format("MOV R0,[0b%s:0b%s]" % (word_m, word_l)) | |
| def op_mov_pcnn(word_h,word_m,word_l): | |
| return format("MOV PC,[0b%s:0b%s]" % (word_m, word_l)) | |
| def op_jr(word_h,word_m,word_l): | |
| return format("JR [0b%s:0b%s]" % (word_m, word_l)) | |
| def op_cp(word_h,word_m,word_l): | |
| return args_r0n("CP", word_l) | |
| def op_add_r0n(word_h,word_m,word_l): | |
| return args_r0n("ADD", word_l) | |
| def op_inc(word_h,word_m,word_l): | |
| return args_ry("INC", word_l) | |
| def op_dec(word_h,word_m,word_l): | |
| return args_ry("DEC", word_l) | |
| def op_dsz(word_h,word_m,word_l): | |
| return args_ry("DSZ", word_l) | |
| def op_or_r0n(word_h,word_m,word_l): | |
| return args_r0n("OR", word_l) | |
| def op_and_r0n(word_h,word_m,word_l): | |
| return args_r0n("AND", word_l) | |
| def op_xor_r0n(word_h,word_m,word_l): | |
| return args_r0n("XOR", word_l) | |
| def op_exr(word_h,word_m,word_l): | |
| return format("EXR %s" % word_l) | |
| def op_bit(word_h,word_m,word_l): | |
| return args_rgm("BIT", word_l) | |
| def op_bset(word_h,word_m,word_l): | |
| return args_rgm("BSET", word_l) | |
| def op_bclr(word_h,word_m,word_l): | |
| return args_rgm("BCLR", word_l) | |
| def op_btg(word_h,word_m,word_l): | |
| return args_rgm("BTG", word_l) | |
| def op_rrc(word_h,word_m,word_l): | |
| return args_ry("RRC", word_l) | |
| def op_ret(word_h,word_m,word_l): | |
| return args_r0n("RET", word_l) | |
| def op_skip(word_h,word_m,word_l): | |
| flag = special_registers[int(word_l[:2],2)] | |
| return format("SKIP %s,0b%s" % (flag, word_l[2:])) | |
| opcodes = { | |
| "0001": op_add_rxry, | |
| "0010": op_adc, | |
| "0011": op_sub, | |
| "0100": op_sbb, | |
| "0101": op_or_rxry, | |
| "0110": op_and_rxry, | |
| "0111": op_xor_rxry, | |
| "1000": op_mov_rxry, | |
| "1001": op_mov_rxn, | |
| "1010": op_mov_xyr0, | |
| "1011": op_mov_r0xy, | |
| "1100": op_mov_nnr0, | |
| "1101": op_mov_r0nn, | |
| "1110": op_mov_pcnn, | |
| "1111": op_jr, | |
| } | |
| excodes = { | |
| "0000": op_cp, | |
| "0001": op_add_r0n, | |
| "0010": op_inc, | |
| "0011": op_dec, | |
| "0100": op_dsz, | |
| "0101": op_or_r0n, | |
| "0110": op_and_r0n, | |
| "0111": op_xor_r0n, | |
| "1000": op_exr, | |
| "1001": op_bit, | |
| "1010": op_bset, | |
| "1011": op_bclr, | |
| "1100": op_btg, | |
| "1101": op_rrc, | |
| "1110": op_ret, | |
| "1111": op_skip, | |
| } | |
| named_registers = ( | |
| "R0", | |
| "R1", | |
| "R2", | |
| "R3", | |
| "R4", | |
| "R5", | |
| "R6", | |
| "R7", | |
| "R8", | |
| "R9", | |
| "OUT", | |
| "IN", | |
| "JSR", | |
| "PCL", | |
| "PCM", | |
| "PCH", | |
| ) | |
| special_registers = ( | |
| #Important: Don't change the index positons of C,NC,Z,NZ as they're | |
| #being used in generating machine code | |
| "C", | |
| "NC", | |
| "Z", | |
| "NZ", | |
| "PC", | |
| ) | |
| def main(): | |
| print("Four-Bit-Badge Disassembler version %s\n" % __version__) | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("hexfile", help=".hex file for disassembly") | |
| parser.add_argument("-q", help="Write to file without showing any human-readable output", action="store_true") | |
| parser.add_argument("-c", help="enable sourcecode readout", action="store_true") | |
| parser.add_argument("-n", help="enable line numbers", action="store_true") | |
| group = parser.add_mutually_exclusive_group() | |
| group.add_argument("-s", help="Show 12-bit instructions with spaces between words", action="store_true") | |
| group.add_argument("-w", help="Show 12-bit instructions without spaces between words", action="store_true") | |
| args = parser.parse_args() | |
| global show_output | |
| global show_verbose | |
| global show_linenums | |
| global show_words | |
| global show_wordspace | |
| global show_source | |
| if args.q: | |
| show_output = False | |
| if args.c: | |
| show_verbose = False | |
| show_source = True | |
| if args.n: | |
| show_verbose = False | |
| show_linenums = True | |
| if args.s: | |
| show_verbose = False | |
| show_words = True | |
| if args.w: | |
| show_verbose = False | |
| show_words = True | |
| show_wordspace = False | |
| ext_idx = args.hexfile.rfind('.') | |
| if ext_idx > 0: | |
| outfile = args.hexfile[:ext_idx] + ".s" | |
| else: | |
| outfile = args.hexfile + ".s" | |
| status = disassemble(read_hex_file(args.hexfile), print_output=show_output, outfile=outfile) | |
| if status == True: | |
| print("\nSuccessfully wrote asm file: %s\n" % outfile) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Mike, I have implemented an experimental version of your assembler using regular expression parsing, kind of as a demo, based on reverse engineering the code above. I posted my implementation at https://github.com/carl3/fbb_as2
It's not as small as I thought (I added lots of comments), but a perl implementation would be a lot smaller where regular expressions are part of the language, and I used compiled regex in python. I also had to create a Class in python so I could test a regex and keep the results.
I have some questions on the spec. Can you give me an email address where I can reach you? Mine is carl at carlhage dot com.
You might be able to use my test files to check yours, but I probably need to fix some things after I can ask questions.