WesleyAC/asm.py

## asm.py
#!/usr/bin/env python3
# vim: ts=11

# Copyright 2019 Google LLC
# SPDX-License-Identifier: Apache-2.0
#
# A SUPER JANK Z80 assembler, written to see if writing a assembler that parses
# the opcode table was reasonable.
# (see https://twitter.com/WAptekar/status/1142828240874221568)
#
# It pretty much works, modulo a few things:
# * Non-canonical representations are not supported (for instance, you must
#   write `sub 42` instead of `sub a, 42`).
# * `djnz` is broken. This is fixable, but more effort than I want to put into
#   a quick hack like this (the assembler currently doesn't know about relative
#   offsets).
# * Probably more stuff's broken as well?
#
# Usage: asm.py input.asm output.bin

import sys, re, ast

# http://z80-heaven.wikidot.com/opcode-reference-chart
tbl = """	0	1	2	3	4	5	6	7	8	9	A	B	C	D	E	F
0	nop	ld bc,xx	ld (bc),a	inc bc	inc b	dec b	ld b,x	rlca	ex af,af'	add hl,bc	ld a,(bc)	dec bc	inc c	dec c	ld c,x	rrca
1	djnz x	ld de,xx	ld (de),a	inc de	inc d	dec d	ld d,x	rla	jr x	add hl,de	ld a,(de)	dec de	inc e	dec e	ld e,x	rra
2	jr nz,x	ld hl,xx	ld (xx),hl	inc hl	inc h	dec h	ld h,x	daa	jr z,x	add hl,hl	ld hl,(xx)	dec hl	inc l	dec l	ld l,x	cpl
3	jr nc,x	ld sp,xx	ld (xx),a	inc sp	inc (hl)	dec (hl)	ld (hl),x	scf	jr c,x	add hl,sp	ld a,(xx)	dec sp	inc a	dec a	ld a,x	ccf
4	ld b,b	ld b,c	ld b,d	ld b,e	ld b,h	ld b,l	ld b,(hl)	ld b,a	ld c,b	ld c,c	ld c,d	ld c,e	ld c,h	ld c,l	ld c,(hl)	ld c,a
5	ld d,b	ld d,c	ld d,d	ld d,e	ld d,h	ld d,l	ld d,(hl)	ld d,a	ld e,b	ld e,c	ld e,d	ld e,e	ld e,h	ld e,l	ld e,(hl)	ld e,a
6	ld h,b	ld h,c	ld h,d	ld h,e	ld h,h	ld h,l	ld h,(hl)	ld h,a	ld l,b	ld l,c	ld l,d	ld l,e	ld l,h	ld l,l	ld l,(hl)	ld l,a
7	ld (hl),b	ld (hl),c	ld (hl),d	ld (hl),e	ld (hl),h	ld (hl),l	halt	ld (hl),a	ld a,b	ld a,c	ld a,d	ld a,e	ld a,h	ld a,l	ld a,(hl)	ld a,a
8	add a,b	add a,c	add a,d	add a,e	add a,h	add a,l	add a,(hl)	add a,a	adc a,b	adc a,c	adc a,d	adc a,e	adc a,h	adc a,l	adc a,(hl)	adc a,a
9	sub b	sub c	sub d	sub e	sub h	sub l	sub (hl)	sub a	sbc a,b	sbc a,c	sbc a,d	sbc a,e	sbc a,h	sbc a,l	sbc a,(hl)	sbc a,a
A	and b	and c	and d	and e	and h	and l	and (hl)	and a	xor b	xor c	xor d	xor e	xor h	xor l	xor (hl)	xor a
B	or b	or c	or d	or e	or h	or l	or (hl)	or a	cp b	cp c	cp d	cp e	cp h	cp l	cp (hl)	cp a
C	ret nz	pop bc	jp nz,xx	jp xx	call nz,xx	push bc	add a,x	rst 00h	ret z	ret	jp z,xx	xxBITxx	call z,xx	call xx	adc a,x	rst 08h
D	ret nc	pop de	jp nc,xx	out (x),a	call nc,xx	push de	sub x	rst 10h	ret c	exx	jp c,xx	in a,(x)	call c,xx	xxIXxx	sbc a,x	rst 18h
E	ret po	pop hl	jp po,xx	ex (sp),hl	call po,xx	push hl	and x	rst 20h	ret pe	jp (hl)	jp pe,xx	ex de,hl	call pe,xx	xx80xx	xor x	rst 28h
F	ret p	pop af	jp p,xx	di	call p,xx	push af	or x	rst 30h	ret m	ld sp,hl	jp m,xx	ei	call m,xx	xxIYxx	cp x	rst 38h"""

def parse_int(s):
    try:
        if s[0] == "'" and s[-1] == "'":
            return ord(ast.literal_eval(s))
        elif s[-1] == "h":
            return int(s[:-1], base=16)
        elif s[:2] == "0x":
            return int(s, base=16)
        return int(s)
    except ValueError:
        return None

def parse_inst(inst, opcodes):
    if inst.strip()[:2] == "db":
        out = []
        for item in ast.literal_eval("[" + inst[3:] + "]"):
            if type(item) == int:
                out.append(item)
            if type(item) == str:
                out += map(ord, item)
        return out
    inst = inst.replace(",", " ").split()
    for opcode, mc in opcodes:
        out = [mc]
        if len(inst) != len(opcode): continue
        for i in range(len(inst)):
            if inst[i].lower() != opcode[i]:
                if ("(" in opcode[i]) and not (inst[i][0] == "(" and inst[i][-1] == ")"): break
                n = parse_int(inst[i].replace("(", "").replace(")", "").strip())
                if opcode[i] in ["x", "(x)"]:
                    if n is not None and n >= 0 and n <= 2**8:
                        out.append(n)
                    else:
                        break
                elif opcode[i] in ["xx", "(xx)"]:
                    if n is not None and n >= 0 and n <= 2**16:
                        out.append((n >> 8) & 0xff)
                        out.append(n & 0xff)
                    elif "(" not in inst[i]: # label
                        out.append(inst[i])
                    else:
                        break
                else:
                    break
        else:
            return out
    return None

def parse_table(tbl):
    opcodes = []
    for nibble1, line in enumerate(tbl.split("\n")[1:]):
        for nibble2, inst in enumerate(line.split("\t")[1:]):
            opcodes.append(
                (inst.replace(",", " ").split(),
                (nibble1 << 4) + nibble2))
    return opcodes

if __name__ == "__main__":
    opcodes = parse_table(tbl)

    label = re.compile(r"([a-zA-Z0-9_\.]+):")
    comment = re.compile(r"([^;]*)(;.*)?")

    ops = []
    labels = {}
    with open(sys.argv[1]) as f:
        for line in f.readlines():
            line = comment.match(line).groups()[0]
            label_match = label.match(line)
            if label_match:
                labels[label_match.groups()[0]] = sum([1 if type(x) == int else 2 for x in ops])
                line = line[label_match.span()[1]:]
            if line.strip() != "":
                ops += parse_inst(line, opcodes)

    new_ops = []
    for op in ops:
        if type(op) == int:
            new_ops.append(op)
        elif type(op) == str:
            new_ops.append(labels[op] & 0xff)
            new_ops.append((labels[op] >> 8) & 0xff)
    with open(sys.argv[2], 'wb') as out:
        out.write(bytes(new_ops))
	#!/usr/bin/env python3
	# vim: ts=11

	# Copyright 2019 Google LLC
	# SPDX-License-Identifier: Apache-2.0
	#
	# A SUPER JANK Z80 assembler, written to see if writing a assembler that parses
	# the opcode table was reasonable.
	# (see https://twitter.com/WAptekar/status/1142828240874221568)
	#
	# It pretty much works, modulo a few things:
	# * Non-canonical representations are not supported (for instance, you must
	# write `sub 42` instead of `sub a, 42`).
	# * `djnz` is broken. This is fixable, but more effort than I want to put into
	# a quick hack like this (the assembler currently doesn't know about relative
	# offsets).
	# * Probably more stuff's broken as well?
	#
	# Usage: asm.py input.asm output.bin

	import sys, re, ast

	# http://z80-heaven.wikidot.com/opcode-reference-chart
	tbl = """ 0 1 2 3 4 5 6 7 8 9 A B C D E F
	0 nop ld bc,xx ld (bc),a inc bc inc b dec b ld b,x rlca ex af,af' add hl,bc ld a,(bc) dec bc inc c dec c ld c,x rrca
	1 djnz x ld de,xx ld (de),a inc de inc d dec d ld d,x rla jr x add hl,de ld a,(de) dec de inc e dec e ld e,x rra
	2 jr nz,x ld hl,xx ld (xx),hl inc hl inc h dec h ld h,x daa jr z,x add hl,hl ld hl,(xx) dec hl inc l dec l ld l,x cpl
	3 jr nc,x ld sp,xx ld (xx),a inc sp inc (hl) dec (hl) ld (hl),x scf jr c,x add hl,sp ld a,(xx) dec sp inc a dec a ld a,x ccf
	4 ld b,b ld b,c ld b,d ld b,e ld b,h ld b,l ld b,(hl) ld b,a ld c,b ld c,c ld c,d ld c,e ld c,h ld c,l ld c,(hl) ld c,a
	5 ld d,b ld d,c ld d,d ld d,e ld d,h ld d,l ld d,(hl) ld d,a ld e,b ld e,c ld e,d ld e,e ld e,h ld e,l ld e,(hl) ld e,a
	6 ld h,b ld h,c ld h,d ld h,e ld h,h ld h,l ld h,(hl) ld h,a ld l,b ld l,c ld l,d ld l,e ld l,h ld l,l ld l,(hl) ld l,a
	7 ld (hl),b ld (hl),c ld (hl),d ld (hl),e ld (hl),h ld (hl),l halt ld (hl),a ld a,b ld a,c ld a,d ld a,e ld a,h ld a,l ld a,(hl) ld a,a
	8 add a,b add a,c add a,d add a,e add a,h add a,l add a,(hl) add a,a adc a,b adc a,c adc a,d adc a,e adc a,h adc a,l adc a,(hl) adc a,a
	9 sub b sub c sub d sub e sub h sub l sub (hl) sub a sbc a,b sbc a,c sbc a,d sbc a,e sbc a,h sbc a,l sbc a,(hl) sbc a,a
	A and b and c and d and e and h and l and (hl) and a xor b xor c xor d xor e xor h xor l xor (hl) xor a
	B or b or c or d or e or h or l or (hl) or a cp b cp c cp d cp e cp h cp l cp (hl) cp a
	C ret nz pop bc jp nz,xx jp xx call nz,xx push bc add a,x rst 00h ret z ret jp z,xx xxBITxx call z,xx call xx adc a,x rst 08h
	D ret nc pop de jp nc,xx out (x),a call nc,xx push de sub x rst 10h ret c exx jp c,xx in a,(x) call c,xx xxIXxx sbc a,x rst 18h
	E ret po pop hl jp po,xx ex (sp),hl call po,xx push hl and x rst 20h ret pe jp (hl) jp pe,xx ex de,hl call pe,xx xx80xx xor x rst 28h
	F ret p pop af jp p,xx di call p,xx push af or x rst 30h ret m ld sp,hl jp m,xx ei call m,xx xxIYxx cp x rst 38h"""

	def parse_int(s):
	try:
	if s[0] == "'" and s[-1] == "'":
	return ord(ast.literal_eval(s))
	elif s[-1] == "h":
	return int(s[:-1], base=16)
	elif s[:2] == "0x":
	return int(s, base=16)
	return int(s)
	except ValueError:
	return None

	def parse_inst(inst, opcodes):
	if inst.strip()[:2] == "db":
	out = []
	for item in ast.literal_eval("[" + inst[3:] + "]"):
	if type(item) == int:
	out.append(item)
	if type(item) == str:
	out += map(ord, item)
	return out
	inst = inst.replace(",", " ").split()
	for opcode, mc in opcodes:
	out = [mc]
	if len(inst) != len(opcode): continue
	for i in range(len(inst)):
	if inst[i].lower() != opcode[i]:
	if ("(" in opcode[i]) and not (inst[i][0] == "(" and inst[i][-1] == ")"): break
	n = parse_int(inst[i].replace("(", "").replace(")", "").strip())
	if opcode[i] in ["x", "(x)"]:
	if n is not None and n >= 0 and n <= 2**8:
	out.append(n)
	else:
	break
	elif opcode[i] in ["xx", "(xx)"]:
	if n is not None and n >= 0 and n <= 2**16:
	out.append((n >> 8) & 0xff)
	out.append(n & 0xff)
	elif "(" not in inst[i]: # label
	out.append(inst[i])
	else:
	break
	else:
	break
	else:
	return out
	return None

	def parse_table(tbl):
	opcodes = []
	for nibble1, line in enumerate(tbl.split("\n")[1:]):
	for nibble2, inst in enumerate(line.split("\t")[1:]):
	opcodes.append(
	(inst.replace(",", " ").split(),
	(nibble1 << 4) + nibble2))
	return opcodes

	if __name__ == "__main__":
	opcodes = parse_table(tbl)

	label = re.compile(r"([a-zA-Z0-9_\.]+):")
	comment = re.compile(r"([^;])(;.)?")

	ops = []
	labels = {}
	with open(sys.argv[1]) as f:
	for line in f.readlines():
	line = comment.match(line).groups()[0]
	label_match = label.match(line)
	if label_match:
	labels[label_match.groups()[0]] = sum([1 if type(x) == int else 2 for x in ops])
	line = line[label_match.span()[1]:]
	if line.strip() != "":
	ops += parse_inst(line, opcodes)

	new_ops = []
	for op in ops:
	if type(op) == int:
	new_ops.append(op)
	elif type(op) == str:
	new_ops.append(labels[op] & 0xff)
	new_ops.append((labels[op] >> 8) & 0xff)
	with open(sys.argv[2], 'wb') as out:
	out.write(bytes(new_ops))