Created
January 21, 2020 05:51
-
-
Save lubieowoce/148406b1dd61d91b1584339ffefbdcc6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"disassemble femtoLisp bytecode (kind of)" | |
""" | |
a serialized function looks like this: | |
#fn( | |
":000r1|Mc0<17702|M]<6@0|N\x8550|M;c1|NK;|N\x85@0c2|Mi10~N31L3;|\x84c3\x82W0e4e5|31316A0c6qe7e5|313141;c8qc93041;c:|Mc1|NKi10~N31L4;" | |
[ | |
else begin or => 1arg-lambda? caddr | |
#fn("=000r1c0|~ML2L1c1|c2e3e4~3131Ki20i10N31L4L3;" [let if begin cddr caddr]) | |
caadr | |
#fn("<000r1c0|~ML2L1c1|e2~31|L2i20i10N31L4L3;" [let if caddr]) | |
#fn(gensym) if | |
] | |
) | |
from: | |
https://github.com/JeffBezanson/femtolisp/blob/dc621773236a55a441dc2dce6b1c5d08e7fa10a1/flisp.boot | |
""" | |
code = b':000r1|Mc0<17702|M]<6@0|N\x8550|M;c1|NK;|N\x85@0c2|Mi10~N31L3;|\x84c3\x82W0e4e5|31316A0c6qe7e5|313141;c8qc93041;c:|Mc1|NKi10~N31L4' | |
vals = [ | |
'else', 'begin', 'or', '=>', '1arg-lambda?', 'caddr', | |
'#fn("=000r1c0|~ML2L1c1|c2e3e4~3131Ki20i10N31L4L3;" [let if begin cddr caddr])', | |
'caadr', | |
'#fn("<000r1c0|~ML2L1c1|e2~31|L2i20i10N31L4L3;" [let if caddr])', | |
'#fn(gensym)', 'if' | |
] | |
def main(): | |
disassemble(code, vals) | |
# from: | |
# https://github.com/JeffBezanson/femtolisp/blob/dc621773236a55a441dc2dce6b1c5d08e7fa10a1/compiler.lsp | |
_insts = """nop dup pop call tcall jmp brf brt jmp.l brf.l brt.l ret | |
eq? eqv? equal? atom? not null? boolean? symbol? | |
number? bound? pair? builtin? vector? fixnum? function? | |
cons list car cdr set-car! set-cdr! | |
apply | |
+ - * / div0 = < compare | |
vector aref aset! | |
loadt loadf loadnil load0 load1 loadi8 | |
loadv loadv.l | |
loadg loadg.l | |
loada loada.l loadc loadc.l | |
setg setg.l | |
seta seta.l setc setc.l | |
closure argc vargc trycatch for tapply | |
add2 sub2 neg largc lvargc | |
loada0 loada1 loadc00 loadc01 call.l tcall.l | |
brne brne.l cadr brnn brnn.l brn brn.l | |
optargs brbound keyargs | |
dummy_t dummy_f dummy_nil""" | |
insts = dict(enumerate(_insts.split())) | |
import struct | |
def disassemble(code, func_vals=None): | |
if func_vals is None: func_vals = {} | |
# https://github.com/JeffBezanson/femtolisp/blob/ec7601076a976f845bc05ad6bd3ed5b8cde58a97/flisp.c#L2038 | |
if code[4] > len(insts): | |
code = bytes(b-48 for b in code) | |
res = [] | |
max_stack = i32_le(code[0: 4]) | |
i = 4 | |
while i < len(code): | |
try: | |
byte = code[i] | |
inst_loc = i | |
inst = insts[byte] | |
i += 1 | |
args = [] | |
for off in arg_offsets(inst): | |
val = code[i] if off == 1 else {2:i16_le, 4:i32_le}[off](code[i:i+off]) | |
args.append(val) | |
i += off | |
if inst is None: | |
inst = byte | |
res.append((inst, args)) | |
out = ' '.join(( | |
'{:>4} '.format(inst_loc), '{:<7}'.format(inst), *('{:>3}'.format(a) for a in args), | |
)) | |
if is_jump(inst): | |
out += ' | ' + str(args[0] + inst_loc) | |
elif uses_vals(inst): | |
out += ' | ' + str(list_get(func_vals, get_val_index(inst, args), '')) | |
print(out) | |
except Exception as e: | |
msg = ' '.join(['error at i =', i, 'inst =', inst, 'code[i] = ', code[i], ' = ', repr(chr(code[i]))]) | |
raise Exception((msg, i, res)) from e | |
return (max_stack, res) | |
def is_jump(inst): | |
return inst in 'jmp brf brt brne brnn brn jmp.l brf.l brt.l brne.l brnn.l brn.l'.split(' ') | |
def uses_vals(inst): | |
return (inst in 'loadv.l loadg.l setg.l loadv loadg setg load0 load1') | |
def get_val_index(inst, args): | |
if (inst in 'loadv.l loadg.l setg.l loadv loadg setg'): return args[0] | |
if (inst in 'load0 load1'): return int(inst[-1]) | |
def arg_offsets(inst): | |
if inst in ('loadv.l loadg.l setg.l'.split(' ')): | |
offs = [4] | |
elif inst in ('loadv loadg setg'.split(' ')): | |
offs = [1] | |
elif inst in ('loada seta call tcall list + - * / vector argc vargc loadi8 apply tapply'.split(' ')): | |
offs = [1] | |
elif inst in ('loada.l seta.l largc lvargc call.l tcall.l'.split(' ')): | |
offs = [4] | |
elif inst in ('loadc setc'.split(' ')): | |
offs = [1, 1] | |
elif inst in ('loadc.l setc.l optargs'.split(' ')): | |
offs = [4, 4] | |
elif inst in ('keyargs'.split(' ')): | |
offs = [4, 4, 4] | |
elif inst in ('brbound'.split(' ')): | |
offs = [4] | |
elif inst in ('jmp brf brt brne brnn brn'.split(' ')): | |
offs = [2] | |
elif inst in ('jmp.l brf.l brt.l brne.l brnn.l brn.l'.split(' ')): | |
offs = [4] | |
else: | |
offs = [] | |
return offs | |
def list_get(xs, i, default=None): return xs[i] if i < len(xs) else default | |
def i32_le(bs): return struct.unpack('<i', bs)[0] | |
def i16_le(bs): return struct.unpack('<h', bs)[0] | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment