Skip to content

Instantly share code, notes, and snippets.

@WesleyAC
Created June 24, 2019 03:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save WesleyAC/3f4107976512d09517d6b2cf6f5ec7a2 to your computer and use it in GitHub Desktop.
Save WesleyAC/3f4107976512d09517d6b2cf6f5ec7a2 to your computer and use it in GitHub Desktop.
janky z80 assembler
#!/usr/bin/env python3
# vim: ts=11
# Copyright 2019 Google LLC
# SPDX-License-Identifier: Apache-2.0
#
# A SUPER JANK Z80 assembler, written to see if writing a assembler that parses
# the opcode table was reasonable.
# (see https://twitter.com/WAptekar/status/1142828240874221568)
#
# It pretty much works, modulo a few things:
# * Non-canonical representations are not supported (for instance, you must
# write `sub 42` instead of `sub a, 42`).
# * `djnz` is broken. This is fixable, but more effort than I want to put into
# a quick hack like this (the assembler currently doesn't know about relative
# offsets).
# * Probably more stuff's broken as well?
#
# Usage: asm.py input.asm output.bin
import sys, re, ast
# http://z80-heaven.wikidot.com/opcode-reference-chart
tbl = """ 0 1 2 3 4 5 6 7 8 9 A B C D E F
0 nop ld bc,xx ld (bc),a inc bc inc b dec b ld b,x rlca ex af,af' add hl,bc ld a,(bc) dec bc inc c dec c ld c,x rrca
1 djnz x ld de,xx ld (de),a inc de inc d dec d ld d,x rla jr x add hl,de ld a,(de) dec de inc e dec e ld e,x rra
2 jr nz,x ld hl,xx ld (xx),hl inc hl inc h dec h ld h,x daa jr z,x add hl,hl ld hl,(xx) dec hl inc l dec l ld l,x cpl
3 jr nc,x ld sp,xx ld (xx),a inc sp inc (hl) dec (hl) ld (hl),x scf jr c,x add hl,sp ld a,(xx) dec sp inc a dec a ld a,x ccf
4 ld b,b ld b,c ld b,d ld b,e ld b,h ld b,l ld b,(hl) ld b,a ld c,b ld c,c ld c,d ld c,e ld c,h ld c,l ld c,(hl) ld c,a
5 ld d,b ld d,c ld d,d ld d,e ld d,h ld d,l ld d,(hl) ld d,a ld e,b ld e,c ld e,d ld e,e ld e,h ld e,l ld e,(hl) ld e,a
6 ld h,b ld h,c ld h,d ld h,e ld h,h ld h,l ld h,(hl) ld h,a ld l,b ld l,c ld l,d ld l,e ld l,h ld l,l ld l,(hl) ld l,a
7 ld (hl),b ld (hl),c ld (hl),d ld (hl),e ld (hl),h ld (hl),l halt ld (hl),a ld a,b ld a,c ld a,d ld a,e ld a,h ld a,l ld a,(hl) ld a,a
8 add a,b add a,c add a,d add a,e add a,h add a,l add a,(hl) add a,a adc a,b adc a,c adc a,d adc a,e adc a,h adc a,l adc a,(hl) adc a,a
9 sub b sub c sub d sub e sub h sub l sub (hl) sub a sbc a,b sbc a,c sbc a,d sbc a,e sbc a,h sbc a,l sbc a,(hl) sbc a,a
A and b and c and d and e and h and l and (hl) and a xor b xor c xor d xor e xor h xor l xor (hl) xor a
B or b or c or d or e or h or l or (hl) or a cp b cp c cp d cp e cp h cp l cp (hl) cp a
C ret nz pop bc jp nz,xx jp xx call nz,xx push bc add a,x rst 00h ret z ret jp z,xx xxBITxx call z,xx call xx adc a,x rst 08h
D ret nc pop de jp nc,xx out (x),a call nc,xx push de sub x rst 10h ret c exx jp c,xx in a,(x) call c,xx xxIXxx sbc a,x rst 18h
E ret po pop hl jp po,xx ex (sp),hl call po,xx push hl and x rst 20h ret pe jp (hl) jp pe,xx ex de,hl call pe,xx xx80xx xor x rst 28h
F ret p pop af jp p,xx di call p,xx push af or x rst 30h ret m ld sp,hl jp m,xx ei call m,xx xxIYxx cp x rst 38h"""
def parse_int(s):
try:
if s[0] == "'" and s[-1] == "'":
return ord(ast.literal_eval(s))
elif s[-1] == "h":
return int(s[:-1], base=16)
elif s[:2] == "0x":
return int(s, base=16)
return int(s)
except ValueError:
return None
def parse_inst(inst, opcodes):
if inst.strip()[:2] == "db":
out = []
for item in ast.literal_eval("[" + inst[3:] + "]"):
if type(item) == int:
out.append(item)
if type(item) == str:
out += map(ord, item)
return out
inst = inst.replace(",", " ").split()
for opcode, mc in opcodes:
out = [mc]
if len(inst) != len(opcode): continue
for i in range(len(inst)):
if inst[i].lower() != opcode[i]:
if ("(" in opcode[i]) and not (inst[i][0] == "(" and inst[i][-1] == ")"): break
n = parse_int(inst[i].replace("(", "").replace(")", "").strip())
if opcode[i] in ["x", "(x)"]:
if n is not None and n >= 0 and n <= 2**8:
out.append(n)
else:
break
elif opcode[i] in ["xx", "(xx)"]:
if n is not None and n >= 0 and n <= 2**16:
out.append((n >> 8) & 0xff)
out.append(n & 0xff)
elif "(" not in inst[i]: # label
out.append(inst[i])
else:
break
else:
break
else:
return out
return None
def parse_table(tbl):
opcodes = []
for nibble1, line in enumerate(tbl.split("\n")[1:]):
for nibble2, inst in enumerate(line.split("\t")[1:]):
opcodes.append(
(inst.replace(",", " ").split(),
(nibble1 << 4) + nibble2))
return opcodes
if __name__ == "__main__":
opcodes = parse_table(tbl)
label = re.compile(r"([a-zA-Z0-9_\.]+):")
comment = re.compile(r"([^;]*)(;.*)?")
ops = []
labels = {}
with open(sys.argv[1]) as f:
for line in f.readlines():
line = comment.match(line).groups()[0]
label_match = label.match(line)
if label_match:
labels[label_match.groups()[0]] = sum([1 if type(x) == int else 2 for x in ops])
line = line[label_match.span()[1]:]
if line.strip() != "":
ops += parse_inst(line, opcodes)
new_ops = []
for op in ops:
if type(op) == int:
new_ops.append(op)
elif type(op) == str:
new_ops.append(labels[op] & 0xff)
new_ops.append((labels[op] >> 8) & 0xff)
with open(sys.argv[2], 'wb') as out:
out.write(bytes(new_ops))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment