Skip to content

Instantly share code, notes, and snippets.

@szczys
Last active January 3, 2021 01:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save szczys/b9a19714ea27d50be01d1a8479f97795 to your computer and use it in GitHub Desktop.
Save szczys/b9a19714ea27d50be01d1a8479f97795 to your computer and use it in GitHub Desktop.
4-Bit Badge Assembler still in development
#!/usr/bin/env python3
import argparse
import string
__version__ = "0.6dev"
comment_delimiter = ";" #Must be a single character or tokenizer will break
accepted_chars = string.ascii_letters+string.digits+"._"
special_delimiters = [',',':','[',']','+','-'] #Characters that me up the ASM language
modifying_keywords = ["LOW","MID","HIGH"]
modifying_operators = ["+","-"]
#Output options
show_output = True
show_verbose = True #Prints all optional things (spaces, comments, empty lines)
show_wordspace = False
show_linenums = False
show_comments = False
header = [0x00, 0xFF, 0x00, 0xFF, 0xA5, 0xC3]
named_registers = (
"R0",
"R1",
"R2",
"R3",
"R4",
"R5",
"R6",
"R7",
"R8",
"R9",
"OUT",
"IN",
"JSR",
"PCL",
"PCM",
"PCH",
)
special_registers = (
#Important: Don't change the index positons of C,NC,Z,NZ as they're
#being used in generating machine code
"C",
"NC",
"Z",
"NZ",
"PC",
)
symbols = dict()
def parse_asm(lines_of_asm,hexfile_out=None):
machine_lines = 0
word_array = []
global symbols
#First pass: Tokenize the input
try:
code_list, symbols = get_tokenized_code(lines_of_asm)
except ParserError:
#Error message will have already printed so bail
return
#Second pass: Generate output
for i, c in enumerate(code_list):
c = code_list[i]
tokens = c.tokens
#Print Blank Lines and Tokens in Verbose Mode
if tokens == None:
if show_verbose and show_output:
print_output(c.source, None, None)
continue
elif tokens[0] in symbols:
if show_verbose and show_output:
print_output(c.source, None, None)
continue
#Do a substitution pass for variables in this set of tokens
working_tokens = list()
for t in tokens:
if type(t) == SmartToken:
working_tokens.append(t.resolve(symbols))
elif t in special_delimiters:
continue
else:
working_tokens.append(t.upper())
try:
opcode = working_tokens[0]
if opcode.upper() == "ORG":
binary = instructions.get(working_tokens[0])(working_tokens, machine_lines)
else:
binary = instructions.get(working_tokens[0])(working_tokens)
except ParserError as e:
print_error(e, i, c.source)
return
if binary == None:
print_error(ParserError("E::Uncaught syntax error"), i, c.source)
return
else:
for bin_line in binary.split('\n'):
#Special directives GOSUB, GOTO, and ORG will return multiple lines
ln = machine_lines if (show_linenums or show_verbose) else None
cm = c.comment if (show_comments or show_verbose) else None
if show_output:
print_output(bin_line,ln,cm)
machine_lines += 1
if hexfile_out != None:
word_array.append(bin_line.replace(" ",""))
if hexfile_out != None:
with open(hexfile_out, "wb") as f:
f.write(bytes(generate_hex(word_array)))
return True
def get_tokenized_code(lines_of_asm):
code_array = []
symbols = dict()
reg_addr = 0
raw_code = lines_of_asm.split('\n')
for i in range(len(raw_code)):
try:
code_obj = parse_line(raw_code[i])
except ParserError as e:
print_error(e, i, raw_code[i])
raise ParserError()
code_array.append(code_obj)
#Validate the opcodes/labels during first pass
if code_obj.tokens != None:
token = code_obj.tokens[0].upper()
if token not in instructions:
#This must be a symbol or a variable definition
if token in symbols:
print_error("E::Cannot define a token that was previously defined", i, code_obj.source)
raise ParserError()
else:
#Prewind the register number for use if this turns out to be a label
s_value = reg_addr
if len(code_obj.tokens) > 1:
if code_obj.tokens[1].upper() == "EQU":
#This is a variable definition, reset this value for the symbol table
s_value = code_obj.tokens[2].resolve(symbols)
#Write correct value to symbols table
symbols[token] = s_value
else:
if token == "GOSUB" or token == "GOTO":
#These directives will add two lines of code instead of one so adjust here
reg_addr += 2
elif token == "ORG":
new_linenum = get_dec_or_token(code_obj.tokens[1].resolve([]))
if type(new_linenum) != int or not 0 <= new_linenum < 4096:
print_error("E::This opcode requires a number [0..4095] as argument but got %s" % new_linenum, i, code_obj.source)
raise ParserError()
elif new_linenum <= reg_addr:
raise ParserError("E::The ORG opcode requires the argument (%d) be greater than the current program memory register number (%d)" % (tokens[1], linenum))
else:
reg_addr = new_linenum
else:
#All other instructions increment the address by one
reg_addr += 1
return code_array,symbols
def tokenize(instring, delimiters=special_delimiters+[comment_delimiter]):
'''
Tokenize a string of ASM code, splitting based on special characters
but at the same time including delimiters (but not whitespace) in the set
'''
tokens = instring.split()
for d in delimiters:
newtokens = list()
for t in tokens:
raw = t.split(d)
for r_idx, r_token in enumerate(raw):
if r_token != '':
'''
element will be empty when delimiter begins or
ends the string that was split
so don't add empty elements
'''
newtokens.append(r_token)
if r_idx != len(raw)-1:
newtokens.append(d)
tokens = newtokens
return tokens
def parse_line(instring):
'''
Performs the work of tokenizing a single line of code,
ensuring that the syntax is valid (although this does
not mean the combo of instructions and operands is valid.)
Returns CodePack() object
'''
raw_tokens = tokenize(instring)
parsed_tokens = list()
code = CodePack(source=instring)
s = TokenizerStates()
#Preserve full comment
if comment_delimiter in instring:
d_idx = instring.index(comment_delimiter)
code.comment = instring[d_idx:]
#Custom type to contain Token streams. Will be used for symbol substition and math in second pass
token_stream = SmartToken()
#Walk through the flowchart
for i, e in enumerate(raw_tokens): #i=index e=element
e = e.upper()
if s.cur_state==s.OPCODE:
if all(c in accepted_chars for c in e):
parsed_tokens.append(e)
s.cur_state = s.WATCH_TOKEN_SET
continue
elif e == comment_delimiter:
#Comment already preserved, nothing left to parse
break
else:
raise ParserError("E::Syntax error: Invalid characters found in: %s" % e)
elif s.cur_state==s.WATCH_COMMENT:
#The only trigger character valid from here on out is comment_delimiter
if e != comment_delimiter:
raise ParserError("E::Expected comment (%s) or end of line but got %s" % (comment_delimiter, e))
else:
#Comment already preserved, nothing left to do
break
elif s.cur_state==s.WATCH_COMMA_COMMENT:
if e==",":
#Should be another token or set coming
s.cur_state = s.WATCH_TOKEN_SET
continue
elif e==comment_delimiter:
#Comment already preserved, nothing left to do
break
else:
raise ParserError("E::Expected comma (,) or comment (%s) but got %s" % (comment_delimiter, e))
elif s.cur_state in [s.WATCH_TOKEN_SET,
s.TOKEN_COLON_BRACKET,
s.TOKEN_BRACKET,
s.TOKEN_COMMA_COMMENT,
s.TOKEN_COMMENT,
s.TOKEN_VAR_DEF]:
'''
Need to know what came before. This is tricky because sometimes we will already
be filling a token stream and other times we'll just be starting one
'''
if len(token_stream) != 0:
previous = token_stream[-1]
else:
previous = parsed_tokens[-1]
if type(previous) == int:
prev_isvalid = True
else:
#Raised error tells us this is not a valid token but has special characters in it
try:
validate_token(previous)
prev_isvalid = True
except:
prev_isvalid = False
if e=="EQU":
if s.cur_state == s.WATCH_TOKEN_SET and len(parsed_tokens)==1 and len(token_stream)==0:
#Found EQU in the right place
parsed_tokens.append(e)
s.cur_state = s.TOKEN_VAR_DEF
continue
raise ParserError("E::Unexpected EQU after %s" % str(previous))
elif e=='[':
if s.cur_state == s.WATCH_TOKEN_SET:
#Found [ in right place
token_stream.append(e)
s.cur_state = s.TOKEN_COLON_BRACKET
continue
raise ParserError("E::Unexpected opening bracket ([) after %s" % str(previous))
elif e==':':
if s.cur_state == s.TOKEN_COLON_BRACKET and prev_isvalid:
#Found : in right place
token_stream.append(e)
s.cur_state = s.TOKEN_BRACKET
continue
elif len(parsed_tokens)==1 and prev_isvalid:
#This is a label assignment; Nothing should come after this but a comment
#This colon is not inside of brackets so it isn't part of a token_stream; it's part of parsed_tokens
parsed_tokens.append(e)
s.cur_state = s.WATCH_COMMENT
continue
raise ParserError("E::Unexpected colon (:) after %s" % str(previous))
elif e=="]":
if s.cur_state in [s.TOKEN_BRACKET, s.TOKEN_COLON_BRACKET] and prev_isvalid:
#Found ] in right place
token_stream.append(e)
parsed_tokens.append(token_stream)
token_stream = SmartToken()
s.cur_state = s.WATCH_COMMA_COMMENT
continue
raise ParserError("E::Unexpected closing bracket (]) after " % str(previous))
elif e==",":
if s.cur_state == s.TOKEN_COMMA_COMMENT and prev_isvalid:
#Found , in right place
if len(token_stream) != 0:
parsed_tokens.append(token_stream)
token_stream = SmartToken()
parsed_tokens.append(e)
s.cur_state = s.WATCH_TOKEN_SET
continue
raise ParserError("E::Unexpected comma (,) after %s" % str(previous))
elif e==";":
if s.cur_state in [s.TOKEN_COMMA_COMMENT, s.TOKEN_COMMENT, s.TOKEN_VAR_DEF] and prev_isvalid:
if len(token_stream) != 0:
parsed_tokens.append(token_stream)
token_stream = SmartToken() #Probably don't need to reset this but just in case
if s.cur_state==s.TOKEN_VAR_DEF:
if len(parsed_tokens) != 3:
raise ParserError("E::Wrong number of items in EQU statement")
#Found ; in right place, no need to parse more
break
raise ParserError("E::Unexpected opening semicolon (;) after " % str(previous))
elif e in modifying_keywords:
if previous in token_preceders:
#Valid HIGH/LOW modifier
token_stream.append(e)
continue
raise ParserError("E::Unexpected modifier %s after %s" % (e,str(previous)))
elif e in modifying_operators:
if previous in token_preceders:
#Hack: add 0 before leading operator (like -/+)
token_stream.append(0)
token_stream.append(e)
continue
elif prev_isvalid:
#Operators can follow valid tokens
token_stream.append(e)
continue
raise ParserError("E::Unexpected operator %s after %s" % (e,str(previous)))
else:
#Should have taken care of all modifiers and dividers
#This will raise an error if it is not a valid token being added
token_stream.append(validate_token(e))
if s.cur_state == s.WATCH_TOKEN_SET:
#We got the token we were watching for so now look for more of this token, or a comma or comment
s.cur_state = s.TOKEN_COMMA_COMMENT
continue
else:
raise ParserError("E::Unknown parser state machine cur_state value: %s" % s.cur_state)
#Catch any token_streams that weren't written
if len(token_stream) != 0:
parsed_tokens.append(token_stream)
if len(parsed_tokens) != 0:
code.tokens = parsed_tokens
return code
def validate_token(e):
'''
Validates the token part of the stoken stream (letter, numbers, underscore, period)
but raised error if special characters like +,-,[,] are found.
'''
if all(c in accepted_chars for c in e):
return e
else:
raise ParserError("E::Illegal characters in token: %s", e)
def format_unexpected_char_error(element,previous):
return format("E::Syntax error: Unexpected %s after %s" % (element,str(previous)))
class TokenizerStates:
def __init__(self):
self.OPCODE = 0
self.TOKEN_COMMA_COMMENT = 1
self.TOKEN_COMMENT = 2
self.TOKEN_COLON_BRACKET = 3
self.TOKEN_BRACKET = 4
self.BRACKETS = 5
self.WATCH_TOKEN_SET = 6
self.WATCH_COMMA_COMMENT = 7
self.WATCH_COMMENT = 8
self.TOKEN_VAR_DEF = 9
self.cur_state = self.OPCODE
self.reset_buffers()
def reset_buffers(self):
self.bracket_token_buffer = []
class SmartToken(list):
def __init__(self, data=None):
if (data != None):
self._stream = list(data)
else:
self._stream = list()
def resolve(self, symbols):
is_set = False
found_named_reg = False
prefix = None
working_token = None
resolved_set = None
for i, e in enumerate(self):
try:
e = e.upper()
except:
pass
if e in named_registers+special_registers:
found_named_reg = True
if e=='[':
if i==0:
is_set = True
resolved_set = list()
continue
else:
raise Exception("Unexpected opening bracket when parsing smart token. This should never happen")
elif e==":":
resolved_set.append(working_token)
found_named_reg = False
prefix = None
working_token = None
continue
elif e=="]":
if i==len(self)-1:
resolved_set.append(working_token)
found_named_reg = False
prefix = None
working_token = None
continue
else:
raise Exception("Unexpected opening bracket when parsing smart token. This should never happen")
elif e in modifying_keywords+modifying_operators:
prefix = e
continue
#Everything that's not a symbol or a token has been filtered out by now
if e in symbols:
filtered_t = symbols[e]
else:
filtered_t = get_dec_or_token(e)
if prefix != None:
if found_named_reg == True:
#If a named register is already in the working_token this will already be set to True
raise ParserError("E::Syntax error: Modifications like (%s) may only be performed on numbers but a named register was found." % prefix)
if type(filtered_t) != int:
raise ParserError("E::Syntax error: Modifications like (%s) may only be performed on numbers but %s was found." % prefix)
if prefix=="LOW":
twelvebits = format(filtered_t, "012b")
working_token = int(twelvebits[8:],2)
elif prefix=="MID":
twelvebits = format(filtered_t, "012b")
working_token = int(twelvebits[4:8],2)
elif prefix=="HIGH":
twelvebits = format(filtered_t, "012b")
working_token = int(twelvebits[:4],2)
elif prefix=="+":
working_token += filtered_t
elif prefix=="-":
working_token -= filtered_t
elif working_token == None:
working_token = filtered_t
else:
raise Exception("Error, multiple tokens without modifiers. This should never happen")
if is_set:
if resolved_set != None:
return resolved_set
else:
if working_token != None:
return working_token
raise Exception("Error, SmartToken.resolve() was unable to finish and didn't raise ParserError(). This should never happen.")
def get_dec_or_token(token):
#Takes a string
# returns a decimal number if that string was a number (decimal, hex, or binary)
# otherwise returns the string
if type(token) == int:
return token
base = 10
if len(token) > 2:
if token[:2].lower() == "0x":
base = 16
elif token[:2].lower() == "0b":
base = 2
try:
#Try to return it as a number
return int(token, base)
except:
#Otherwise it must be a token
return token
def checksum(hexarray):
#Returns 16-bit checksum
# Param: Hexarray is an array of hex values. There must be an even number,
# each pair arrange with low byte first, high second
#
# Return: 16-bit checksum as two hex values, low byte first, high second
checksum = 0
for low,high in zip(*[iter(hexarray)]*2):
checksum += low + (high*256)
checksum = checksum % 0xFFFF
byte_h = int(checksum/256)
byte_l = int(checksum%256)
return [byte_l, byte_h]
def generate_hex(program_list, h=header):
len_message = twelve_bit_to_hex_bytes(format(len(program_list),"012b"))
message = len_message
for i in program_list:
message += twelve_bit_to_hex_bytes(i)
#This is a dirty hack because checksum takes and returns strings
message += checksum(message)
message = h + message
return message
def twelve_bit_to_hex_bytes(twelvebit):
#Takes a string that is a 12-bit binary number
#Returns two decimal bytes, low byte first
return [int(twelvebit[-8:],2),int(twelvebit[:4],2)]
def args_rxry(tokens,opcode):
#ADD ADC SUB SBB OR AND XOR MOV
arg_count_test(len(tokens),3)
if tokens[1] in named_registers and tokens[2] in named_registers:
return format_output(opcode, get_reg_binary(tokens[1]), get_reg_binary(tokens[2]))
else:
raise ParserError("E::This opcode requires register names as arguments")
def args_r0n(tokens,opcode):
#CP ADD OR AND XOR RET
arg_count_test(len(tokens),3)
if tokens[1] != "R0":
raise ParserError("E::This opcode requires R0 as the first argument")
elif not is_int(tokens[2]):
raise ParserError("E::This opcode requires a number as the second argument")
else:
return format_output("0000", opcode, get_four_bit_binary(tokens[2]))
def args_ry(tokens,opcode):
#INC DEC DSZ RRC
arg_count_test(len(tokens),2)
if tokens[1] not in named_registers:
raise ParserError("E::This opcode requires a register name as the argument")
else:
return format_output("0000", opcode, get_reg_binary(tokens[1]))
def args_rgm(tokens,opcode):
#BIT BSET BCLR BTG
arg_count_test(len(tokens),3)
if tokens[1] not in ["R0","R1","R2","R3"]:
raise ParserError("E::This opcode requires R0, R1, R2, or R3 as the first argument")
elif not is_int(tokens[2]) or not 0 <= tokens[2] < 4:
raise ParserError("E::This opcode requires a number [0..3] as the second argument")
else:
return format_output("0000", opcode, get_reg_binary(tokens[1])[2:]+get_four_bit_binary(tokens[2])[2:])
def args_go(tokens,trigger_reg):
arg_count_test(len(tokens),2)
reg_value = tokens[1]
if 0 <= reg_value < 4096:
word_h, word_m, word_l = pc_addr_to_bin(reg_value)
two_lines = format_output("1110", word_h, word_m) + "\n" + format_output("1001",trigger_reg,word_l)
return two_lines
else:
raise ParserError("E::Register value is out of range (0 <= reg_value < 4096): %d" % reg_value)
def opcode_add(tokens):
#ADD R0,N
if is_int(tokens[2]):
return args_r0n(tokens,"0001")
#ADD RX,RY
else:
return args_rxry(tokens,"0001")
def opcode_adc(tokens):
return args_rxry(tokens,"0010")
def opcode_sub(tokens):
return args_rxry(tokens,"0011")
def opcode_sbb(tokens):
return args_rxry(tokens,"0100")
def opcode_or(tokens):
#OR R0,N
if is_int(tokens[2]):
return args_r0n(tokens,"0101")
#OR RX,RY
else:
return args_rxry(tokens,"0101")
def opcode_and(tokens):
#AND R0,N
if is_int(tokens[2]):
return args_r0n(tokens,"0110")
#AND RX,RY
else:
return args_rxry(tokens,"0110")
def opcode_xor(tokens):
#XOR R0,N
if is_int(tokens[2]):
return args_r0n(tokens,"0111")
#XOR RX,RY
else:
return args_rxry(tokens,"0111")
def opcode_mov(tokens):
arg_count_test(len(tokens),3)
if any(isinstance(i,list) for i in tokens):
#Must be instruction containing a set of brackets
if tokens[1] == "PC":
#MOV PC,NN
if all(is_int(i) for i in tokens[2]):
word_high, word_low = get_eight_bit_binary(tokens[2])
return format_output("1110", word_high, word_low)
else:
raise ParserError("E::Expected literal number for MOV PC, NN")
elif "R0" in tokens:
if tokens[1] == "R0":
if all(is_int(i) for i in tokens[2]):
#MOV R0,[NN]
word_high, word_low = get_eight_bit_binary(tokens[2])
return format_output("1101", word_high, word_low)
elif len(tokens[2]) == 2 and all(i in named_registers for i in tokens[2]):
#MOV R0,[XY]
return format_output("1011", get_reg_binary(tokens[2][0]), get_reg_binary(tokens[2][1]))
else:
raise ParserError("E::Type mismatch for values inside brackets")
elif tokens[2] == "R0":
if all(is_int(i) for i in tokens[1]):
#MOV [NN],R0
word_high, word_low = get_eight_bit_binary(tokens[1])
return format_output("1100", word_high, word_low)
elif len(tokens[1]) == 2 and all(i in named_registers for i in tokens[1]):
#MOV [XY],R0
return format_output("1010", get_reg_binary(tokens[1][0]),get_reg_binary(tokens[1][1]))
else:
raise ParserError("E::Type mismatch for values inside brackets")
else:
raise Exception()
#Catch edge case syntax error
elif tokens[1] == "PC":
raise ParserError("E::Syntax error, numeric literal must be in brackets for PC,[NN]")
#MOV RX,RY
elif tokens[1] in named_registers and tokens[2] in named_registers:
return args_rxry(tokens,"1000")
#MOV RX,N
elif is_int(tokens[2]):
if tokens[1] in named_registers:
return format_output("1001", get_reg_binary(tokens[1]), get_four_bit_binary(tokens[2]))
else:
raise ParserError("E::Expected register name for first argument of MOV RX,N")
else:
raise Exception()
def opcode_jr(tokens):
arg_count_test(len(tokens),2)
if all(is_int(i) for i in tokens[1]):
word_high, word_low = get_eight_bit_binary(tokens[1], signed=True)
return format_output("1111", word_high, word_low)
else:
raise ParserError("E::This opcode requires two numbers as arguments")
def opcode_cp(tokens):
return args_r0n(tokens,"0000")
def opcode_inc(tokens):
return args_ry(tokens,"0010")
def opcode_dec(tokens):
return args_ry(tokens,"0011")
def opcode_dsz(tokens):
return args_ry(tokens,"0100")
def opcode_exr(tokens):
arg_count_test(len(tokens),2)
if is_int(tokens[1]):
return format_output("0000", "1000", get_four_bit_binary(tokens[1]))
else:
raise ParserError("E::This opcode requires a number as the argument")
def opcode_bit(tokens):
return args_rgm(tokens,"1001")
def opcode_bset(tokens):
return args_rgm(tokens,"1010")
def opcode_bclr(tokens):
return args_rgm(tokens,"1011")
def opcode_btg(tokens):
return args_rgm(tokens,"1100")
def opcode_rrc(tokens):
return args_ry(tokens,"1101")
def opcode_ret(tokens):
return args_r0n(tokens,"1110")
def opcode_skip(tokens):
#SKIP F,M
arg_count_test(len(tokens),3)
if tokens[1] not in special_registers[:4]:
raise ParserError("E::This opcode requires %s, %s, %s, or %s as the first argument" % special_registers[:4])
elif not is_int(tokens[2]) or not 0 <= tokens[2] < 4:
raise ParserError("E::This opcode requires a number [0..3] as the second argument")
else:
F = format(special_registers.index(tokens[1]),"02b")
return format_output("0000", "1111", F + get_four_bit_binary(tokens[2])[2:])
def opcode_goto(tokens):
return args_go(tokens,get_reg_binary("PCL"))
def opcode_gosub(tokens):
return args_go(tokens,get_reg_binary("JSR"))
def opcode_org(tokens,linenum):
arg_count_test(len(tokens),2)
#This should have been validated as a number within range and greater than
#current program memory register when the symbols table was calculated
#in get_tokenized_code()
lines_to_fill = tokens[1] - linenum
return str("0000 0000 0000\n"*lines_to_fill).rstrip('\n')
def is_int(val):
return(type(val) == int)
def get_four_bit_binary(value):
#Convert int to 4-bit binary
#Raise if out of range
if not 0 <= value < 16:
raise ParserError("E::Literal value out of range. Expected [0..15]")
else:
return format(value, "04b")
def get_eight_bit_binary(brackets, signed=False):
#Convert array containing numbers to two 4-bit binary values
#Raise if out of range
if len(brackets) == 1:
if not 0 <= brackets[0] < 256 and signed==False:
raise ParserError("E::Literal value out of range. Expected [0..255]")
elif not -128 <= brackets[0] < 127 and signed==True:
raise ParserError("E::Literal value out of range. Expected [-128..127]")
else:
bin_byte = format(brackets[0] & int("11111111",2),"08b")
return bin_byte[:4], bin_byte[4:]
else:
if any(not 0 <= i < 16 for i in brackets):
raise ParserError("E::Literal value out of range. Expected [0..15]")
else:
return format(brackets[0], "04b"), format(brackets[1], "04b")
def get_reg_binary(reg_name):
#Returns 4-bit binary value of a named register
return format(named_registers.index(reg_name),"04b")
def arg_count_test(actual, expected):
#Token count includes opcode so arg count will be one less
if actual != expected:
raise ParserError("E::Expected %d arguments for this opcode but got %d" % (expected-1, actual-1))
def format_output(*arg):
#Argument list must all be strings
if show_wordspace or show_verbose:
format_string = ("%s " * len(arg)).rstrip()
else:
format_string = ("%s" * len(arg)).rstrip()
outstring = format(format_string % arg)
return outstring
def pc_addr_to_bin(pc_num):
binary = format(pc_num, "012b")
return (binary[:4],binary[4:8],binary[8:])
def print_output(binary,ln,cm):
outstring = ""
if ln != None:
outstring += format(ln, " 4") + '\t'
outstring += binary
if cm != None:
outstring += '\t' + cm
print(outstring)
def print_error(e, line_num, line):
print("%s\n\tLine %d:\t%s" % (e,line_num,line))
def read_asm_file(filename):
with open(filename, 'r') as file:
stream = file.read()
return stream
class CodePack:
def __init__(self, tokens=None, comment=None, source=None):
self.tokens = tokens
self.comment = comment
self.source = source
class ParserError(Exception):
pass
instructions = {
"ADD": opcode_add, # 17
"ADC": opcode_adc,
"SUB": opcode_sub,
"SBB": opcode_sbb,
"OR": opcode_or, #21
"AND": opcode_and, #22
"XOR": opcode_xor, #23
"MOV": opcode_mov, #9,10,11,12,13,14
"JR": opcode_jr,
"CP": opcode_cp,
"INC": opcode_inc,
"DEC": opcode_dec,
"DSZ": opcode_dsz,
"EXR": opcode_exr,
"BIT": opcode_bit,
"BSET": opcode_bset,
"BCLR": opcode_bclr,
"BTG": opcode_btg,
"RRC": opcode_rrc,
"RET": opcode_ret,
"SKIP": opcode_skip,
"GOTO": opcode_goto,
"GOSUB": opcode_gosub,
"ORG": opcode_org,
}
token_preceders = [*instructions]+["EQU","[",":",","]
def main():
print("Four-Bit-Badge Assembler version %s\n" % __version__)
parser = argparse.ArgumentParser()
parser.add_argument("asmfile", help="assembly language file to be processed")
parser.add_argument("-q", help="Write to file without showing any human-readable output", action="store_true")
parser.add_argument("-c", help="enable comments in readout", action="store_true")
parser.add_argument("-n", help="enable line numbers in readout", action="store_true")
group = parser.add_mutually_exclusive_group()
group.add_argument("-s", help="Show 12-bit instructions with spaces between words", action="store_true")
group.add_argument("-w", help="Show 12-bit instructions without spaces between words", action="store_true")
args = parser.parse_args()
global show_output
global show_verbose
global show_wordspace
global show_linenums
global show_comments
if args.q:
show_output = False
if args.c:
show_verbose = False
show_comments = True
if args.n:
show_verbose = False
show_linenums = True
if args.s:
show_verbose = False
show_wordspace = True
if args.w:
show_verbose = False
show_wordspace = False
ext_idx = args.asmfile.rfind('.')
if ext_idx > 0:
outfile = args.asmfile[:ext_idx] + ".hex"
else:
outfile = args.asmfile + ".hex"
if parse_asm(read_asm_file(args.asmfile),hexfile_out=outfile) == True:
print("\nSuccessfully wrote hex file: %s\n" % outfile)
if __name__ == "__main__":
main()
#!/usr/bin/env python3
import argparse
from fbb_as import checksum
__version__ = "0.6dev"
header = [0x00, 0xFF, 0x00, 0xFF, 0xA5, 0xC3]
#Output options
show_output = True
show_verbose = True #Prints all optional things (spaces, comments, empty lines)
show_words = False
show_wordspace = True
show_linenums = False
show_source = False
def is_valid(hexarray, h=header):
#Return True if hex string has header and correct checksum
# Param: Hexarray is a list of hex values. There must be an even number of
# elements, each pair arrange with low byte first, high second.
# It must begin with the header values and end with a valid
# 16-bit checksum as low-byte, high-byte pair for hex values
header_len = len(h)
hexarray_len = len(hexarray)
if hexarray_len%2 != 0:
raise Exception("Binary message must be an even number of bytes but %d were found." % hexarray_len)
if hexarray[:6] != h:
raise Exception("Binary message must begin with header: %s but found: %s" % (str(h),str(hexarray)))
message = hexarray[header_len:-2]
csum = checksum(message)
if csum == hexarray[-2:]:
return True
return False
def read_hex_file(filename):
with open(filename, mode='rb') as file:
stream = file.read()
return [h for h in stream]
def write_asm_file(filename, contents):
with open(filename, 'w') as file:
file.writelines("%s\n" % i for i in contents)
return True
def disassemble(hexarray, h=header, print_output=True, outfile=None):
output_buffer = []
if not is_valid(hexarray):
raise Exception("Data has an invalid checksum")
message = hexarray[len(h)+2:-2]
for low,high in zip(*[iter(message)]*2):
byte_l = format(low,"08b")
byte_h = format(high,"08b")
word_l = byte_l[4:]
word_m = byte_l[:4]
word_h = byte_h[4:]
line_number = 0
if word_h == "0000":
source = excodes[word_m](word_h,word_m,word_l)
else:
source = opcodes[word_h](word_h,word_m,word_l)
this_line = format_output_line(line_number, word_h, word_m, word_l, source)
line_number += 1
if print_output:
print(this_line)
if outfile != None:
output_buffer.append(this_line)
if outfile != None:
return write_asm_file(outfile,output_buffer)
def format_output_line(ln, word_h, word_m, word_l, source):
outstring = ""
if show_linenums or show_verbose:
outstring += format("%s\t" % format(ln," 5d"))
if show_words or show_verbose:
if show_wordspace or show_verbose:
outstring += format("%s %s %s\t" % (word_h, word_m, word_l))
else:
outstring += format("%s%s%s\t" % (word_h, word_m, word_l))
if show_source or show_verbose:
outstring += format("%s" % source)
return outstring.rstrip()
def args_rxry(instruction, oper_x, oper_y):
return format("%s %s,%s" % (instruction, named_registers[int(oper_x,2)], named_registers[int(oper_y,2)]))
def args_ry(instruction, oper_y):
return format("%s %s" % (instruction, named_registers[int(oper_y,2)]))
def args_r0n(instruction, oper_y):
return format("%s R0,0b%s" % (instruction, oper_y))
def args_rgm(instruction, oper_y):
reg = named_registers[int(oper_y[:2],2)]
return format("%s %s,0b%s" % (instruction, reg, oper_y[2:]))
def op_add_rxry(word_h,word_m,word_l):
return args_rxry("ADD",word_m,word_l)
def op_adc(word_h,word_m,word_l):
return args_rxry("ADC",word_m,word_l)
def op_sub(word_h,word_m,word_l):
return args_rxry("SUB",word_m,word_l)
def op_sbb(word_h,word_m,word_l):
return args_rxry("SBB",word_m,word_l)
def op_or_rxry(word_h,word_m,word_l):
return args_rxry("OR",word_m,word_l)
def op_and_rxry(word_h,word_m,word_l):
return args_rxry("AND",word_m,word_l)
def op_xor_rxry(word_h,word_m,word_l):
return args_rxry("XOR",word_m,word_l)
def op_mov_rxry(word_h,word_m,word_l):
return args_rxry("MOV",word_m,word_l)
def op_mov_rxn(word_h,word_m,word_l):
return format("MOV %s,0b%s" % (named_registers[int(word_m,2)], word_l))
def op_mov_xyr0(word_h,word_m,word_l):
return format("MOV [%s:%s],R0" % (named_registers[int(word_m,2)], named_registers[int(word_l,2)]))
def op_mov_r0xy(word_h,word_m,word_l):
return format("MOV R0,[%s:%s]" % (named_registers[int(word_m,2)], named_registers[int(word_l,2)]))
def op_mov_nnr0(word_h,word_m,word_l):
return format("MOV [0b%s:0b%s],R0" % (word_m, word_l))
def op_mov_r0nn(word_h,word_m,word_l):
return format("MOV R0,[0b%s:0b%s]" % (word_m, word_l))
def op_mov_pcnn(word_h,word_m,word_l):
return format("MOV PC,[0b%s:0b%s]" % (word_m, word_l))
def op_jr(word_h,word_m,word_l):
return format("JR [0b%s:0b%s]" % (word_m, word_l))
def op_cp(word_h,word_m,word_l):
return args_r0n("CP", word_l)
def op_add_r0n(word_h,word_m,word_l):
return args_r0n("ADD", word_l)
def op_inc(word_h,word_m,word_l):
return args_ry("INC", word_l)
def op_dec(word_h,word_m,word_l):
return args_ry("DEC", word_l)
def op_dsz(word_h,word_m,word_l):
return args_ry("DSZ", word_l)
def op_or_r0n(word_h,word_m,word_l):
return args_r0n("OR", word_l)
def op_and_r0n(word_h,word_m,word_l):
return args_r0n("AND", word_l)
def op_xor_r0n(word_h,word_m,word_l):
return args_r0n("XOR", word_l)
def op_exr(word_h,word_m,word_l):
return format("EXR %s" % word_l)
def op_bit(word_h,word_m,word_l):
return args_rgm("BIT", word_l)
def op_bset(word_h,word_m,word_l):
return args_rgm("BSET", word_l)
def op_bclr(word_h,word_m,word_l):
return args_rgm("BCLR", word_l)
def op_btg(word_h,word_m,word_l):
return args_rgm("BTG", word_l)
def op_rrc(word_h,word_m,word_l):
return args_ry("RRC", word_l)
def op_ret(word_h,word_m,word_l):
return args_r0n("RET", word_l)
def op_skip(word_h,word_m,word_l):
flag = special_registers[int(word_l[:2],2)]
return format("SKIP %s,0b%s" % (flag, word_l[2:]))
opcodes = {
"0001": op_add_rxry,
"0010": op_adc,
"0011": op_sub,
"0100": op_sbb,
"0101": op_or_rxry,
"0110": op_and_rxry,
"0111": op_xor_rxry,
"1000": op_mov_rxry,
"1001": op_mov_rxn,
"1010": op_mov_xyr0,
"1011": op_mov_r0xy,
"1100": op_mov_nnr0,
"1101": op_mov_r0nn,
"1110": op_mov_pcnn,
"1111": op_jr,
}
excodes = {
"0000": op_cp,
"0001": op_add_r0n,
"0010": op_inc,
"0011": op_dec,
"0100": op_dsz,
"0101": op_or_r0n,
"0110": op_and_r0n,
"0111": op_xor_r0n,
"1000": op_exr,
"1001": op_bit,
"1010": op_bset,
"1011": op_bclr,
"1100": op_btg,
"1101": op_rrc,
"1110": op_ret,
"1111": op_skip,
}
named_registers = (
"R0",
"R1",
"R2",
"R3",
"R4",
"R5",
"R6",
"R7",
"R8",
"R9",
"OUT",
"IN",
"JSR",
"PCL",
"PCM",
"PCH",
)
special_registers = (
#Important: Don't change the index positons of C,NC,Z,NZ as they're
#being used in generating machine code
"C",
"NC",
"Z",
"NZ",
"PC",
)
def main():
print("Four-Bit-Badge Disassembler version %s\n" % __version__)
parser = argparse.ArgumentParser()
parser.add_argument("hexfile", help=".hex file for disassembly")
parser.add_argument("-q", help="Write to file without showing any human-readable output", action="store_true")
parser.add_argument("-c", help="enable sourcecode readout", action="store_true")
parser.add_argument("-n", help="enable line numbers", action="store_true")
group = parser.add_mutually_exclusive_group()
group.add_argument("-s", help="Show 12-bit instructions with spaces between words", action="store_true")
group.add_argument("-w", help="Show 12-bit instructions without spaces between words", action="store_true")
args = parser.parse_args()
global show_output
global show_verbose
global show_linenums
global show_words
global show_wordspace
global show_source
if args.q:
show_output = False
if args.c:
show_verbose = False
show_source = True
if args.n:
show_verbose = False
show_linenums = True
if args.s:
show_verbose = False
show_words = True
if args.w:
show_verbose = False
show_words = True
show_wordspace = False
ext_idx = args.hexfile.rfind('.')
if ext_idx > 0:
outfile = args.hexfile[:ext_idx] + ".s"
else:
outfile = args.hexfile + ".s"
status = disassemble(read_hex_file(args.hexfile), print_output=show_output, outfile=outfile)
if status == True:
print("\nSuccessfully wrote asm file: %s\n" % outfile)
if __name__ == "__main__":
main()
@carl3
Copy link

carl3 commented Jan 3, 2021

Mike, I have implemented an experimental version of your assembler using regular expression parsing, kind of as a demo, based on reverse engineering the code above. I posted my implementation at https://github.com/carl3/fbb_as2

It's not as small as I thought (I added lots of comments), but a perl implementation would be a lot smaller where regular expressions are part of the language, and I used compiled regex in python. I also had to create a Class in python so I could test a regex and keep the results.

I have some questions on the spec. Can you give me an email address where I can reach you? Mine is carl at carlhage dot com.

You might be able to use my test files to check yours, but I probably need to fix some things after I can ask questions.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment