A minimal disassembler with an example on code instrumentation
Last active
May 16, 2023 03:05
-
-
Save andy0130tw/f21698b9a3f8c0483a24b9d2238081c3 to your computer and use it in GitHub Desktop.
Dumping opcodes from generated C code from QuickJS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
def make_table(lns): | |
table = [None] * 256 | |
for _n, _desc in map(lambda x: x.split(' = '), lns.split('\n')): | |
n = int(_n, 16) | |
desc = _desc.split() | |
desc[1] = int(desc[1]) | |
desc[2] = int(desc[2]) | |
desc[3] = int(desc[3]) | |
table[n] = desc | |
return table | |
opcodes = make_table('''\ | |
00 = invalid 1 0 0 none | |
01 = push_i32 5 0 1 i32 | |
02 = push_const 5 0 1 const | |
03 = fclosure 5 0 1 const | |
04 = push_atom_value 5 0 1 atom | |
05 = private_symbol 5 0 1 atom | |
06 = undefined 1 0 1 none | |
07 = null 1 0 1 none | |
08 = push_this 1 0 1 none | |
09 = push_false 1 0 1 none | |
0a = push_true 1 0 1 none | |
0b = object 1 0 1 none | |
0c = special_object 2 0 1 u8 | |
0d = rest 3 0 1 u16 | |
0e = drop 1 1 0 none | |
0f = nip 1 2 1 none | |
10 = nip1 1 3 2 none | |
11 = dup 1 1 2 none | |
12 = dup1 1 2 3 none | |
13 = dup2 1 2 4 none | |
14 = dup3 1 3 6 none | |
15 = insert2 1 2 3 none | |
16 = insert3 1 3 4 none | |
17 = insert4 1 4 5 none | |
18 = perm3 1 3 3 none | |
19 = perm4 1 4 4 none | |
1a = perm5 1 5 5 none | |
1b = swap 1 2 2 none | |
1c = swap2 1 4 4 none | |
1d = rot3l 1 3 3 none | |
1e = rot3r 1 3 3 none | |
1f = rot4l 1 4 4 none | |
20 = rot5l 1 5 5 none | |
21 = call_constructor 3 2 1 npop | |
22 = call 3 1 1 npop | |
23 = tail_call 3 1 0 npop | |
24 = call_method 3 2 1 npop | |
25 = tail_call_method 3 2 0 npop | |
26 = array_from 3 0 1 npop | |
27 = apply 3 3 1 u16 | |
28 = return 1 1 0 none | |
29 = return_undef 1 0 0 none | |
2a = check_ctor_return 1 1 2 none | |
2b = check_ctor 1 0 0 none | |
2c = check_brand 1 2 2 none | |
2d = add_brand 1 2 0 none | |
2e = return_async 1 1 0 none | |
2f = throw 1 1 0 none | |
30 = throw_error 6 0 0 atom_u8 | |
31 = eval 5 1 1 npop_u16 | |
32 = apply_eval 3 2 1 u16 | |
33 = regexp 1 2 1 none | |
34 = get_super 1 1 1 none | |
35 = import 1 1 1 none | |
36 = check_var 5 0 1 atom | |
37 = get_var_undef 5 0 1 atom | |
38 = get_var 5 0 1 atom | |
39 = put_var 5 1 0 atom | |
3a = put_var_init 5 1 0 atom | |
3b = put_var_strict 5 2 0 atom | |
3c = get_ref_value 1 2 3 none | |
3d = put_ref_value 1 3 0 none | |
3e = define_var 6 0 0 atom_u8 | |
3f = check_define_var 6 0 0 atom_u8 | |
40 = define_func 6 1 0 atom_u8 | |
41 = get_field 5 1 1 atom | |
42 = get_field2 5 1 2 atom | |
43 = put_field 5 2 0 atom | |
44 = get_private_field 1 2 1 none | |
45 = put_private_field 1 3 0 none | |
46 = define_private_field 1 3 1 none | |
47 = get_array_el 1 2 1 none | |
48 = get_array_el2 1 2 2 none | |
49 = put_array_el 1 3 0 none | |
4a = get_super_value 1 3 1 none | |
4b = put_super_value 1 4 0 none | |
4c = define_field 5 2 1 atom | |
4d = set_name 5 1 1 atom | |
4e = set_name_computed 1 2 2 none | |
4f = set_proto 1 2 1 none | |
50 = set_home_object 1 2 2 none | |
51 = define_array_el 1 3 2 none | |
52 = append 1 3 2 none | |
53 = copy_data_properties 2 3 3 u8 | |
54 = define_method 6 2 1 atom_u8 | |
55 = define_method_computed 2 3 1 u8 | |
56 = define_class 6 2 2 atom_u8 | |
57 = define_class_computed 6 3 3 atom_u8 | |
58 = get_loc 3 0 1 loc | |
59 = put_loc 3 1 0 loc | |
5a = set_loc 3 1 1 loc | |
5b = get_arg 3 0 1 arg | |
5c = put_arg 3 1 0 arg | |
5d = set_arg 3 1 1 arg | |
5e = get_var_ref 3 0 1 var_ref | |
5f = put_var_ref 3 1 0 var_ref | |
60 = set_var_ref 3 1 1 var_ref | |
61 = set_loc_uninitialized 3 0 0 loc | |
62 = get_loc_check 3 0 1 loc | |
63 = put_loc_check 3 1 0 loc | |
64 = put_loc_check_init 3 1 0 loc | |
65 = get_var_ref_check 3 0 1 var_ref | |
66 = put_var_ref_check 3 1 0 var_ref | |
67 = put_var_ref_check_init 3 1 0 var_ref | |
68 = close_loc 3 0 0 loc | |
69 = if_false 5 1 0 label | |
6a = if_true 5 1 0 label | |
6b = goto 5 0 0 label | |
6c = catch 5 0 1 label | |
6d = gosub 5 0 0 label | |
6e = ret 1 1 0 none | |
6f = to_object 1 1 1 none | |
70 = to_propkey 1 1 1 none | |
71 = to_propkey2 1 2 2 none | |
72 = with_get_var 10 1 0 atom_label_u8 | |
73 = with_put_var 10 2 1 atom_label_u8 | |
74 = with_delete_var 10 1 0 atom_label_u8 | |
75 = with_make_ref 10 1 0 atom_label_u8 | |
76 = with_get_ref 10 1 0 atom_label_u8 | |
77 = with_get_ref_undef 10 1 0 atom_label_u8 | |
78 = make_loc_ref 7 0 2 atom_u16 | |
79 = make_arg_ref 7 0 2 atom_u16 | |
7a = make_var_ref_ref 7 0 2 atom_u16 | |
7b = make_var_ref 5 0 2 atom | |
7c = for_in_start 1 1 1 none | |
7d = for_of_start 1 1 3 none | |
7e = for_await_of_start 1 1 3 none | |
7f = for_in_next 1 1 3 none | |
80 = for_of_next 2 3 5 u8 | |
81 = iterator_check_object 1 1 1 none | |
82 = iterator_get_value_done 1 1 2 none | |
83 = iterator_close 1 3 0 none | |
84 = iterator_close_return 1 4 4 none | |
85 = iterator_next 1 4 4 none | |
86 = iterator_call 2 4 5 u8 | |
87 = initial_yield 1 0 0 none | |
88 = yield 1 1 2 none | |
89 = yield_star 1 1 2 none | |
8a = async_yield_star 1 1 2 none | |
8b = await 1 1 1 none | |
8c = neg 1 1 1 none | |
8d = plus 1 1 1 none | |
8e = dec 1 1 1 none | |
8f = inc 1 1 1 none | |
90 = post_dec 1 1 2 none | |
91 = post_inc 1 1 2 none | |
92 = dec_loc 2 0 0 loc8 | |
93 = inc_loc 2 0 0 loc8 | |
94 = add_loc 2 1 0 loc8 | |
95 = not 1 1 1 none | |
96 = lnot 1 1 1 none | |
97 = typeof 1 1 1 none | |
98 = delete 1 2 1 none | |
99 = delete_var 5 0 1 atom | |
9a = mul 1 2 1 none | |
9b = div 1 2 1 none | |
9c = mod 1 2 1 none | |
9d = add 1 2 1 none | |
9e = sub 1 2 1 none | |
9f = pow 1 2 1 none | |
a0 = shl 1 2 1 none | |
a1 = sar 1 2 1 none | |
a2 = shr 1 2 1 none | |
a3 = lt 1 2 1 none | |
a4 = lte 1 2 1 none | |
a5 = gt 1 2 1 none | |
a6 = gte 1 2 1 none | |
a7 = instanceof 1 2 1 none | |
a8 = in 1 2 1 none | |
a9 = eq 1 2 1 none | |
aa = neq 1 2 1 none | |
ab = strict_eq 1 2 1 none | |
ac = strict_neq 1 2 1 none | |
ad = and 1 2 1 none | |
ae = xor 1 2 1 none | |
af = or 1 2 1 none | |
b0 = is_undefined_or_null 1 1 1 none | |
b1 = mul_pow10 1 2 1 none | |
b2 = math_mod 1 2 1 none | |
b3 = nop 1 0 0 none | |
b4 = push_minus1 1 0 1 none_int | |
b5 = push_0 1 0 1 none_int | |
b6 = push_1 1 0 1 none_int | |
b7 = push_2 1 0 1 none_int | |
b8 = push_3 1 0 1 none_int | |
b9 = push_4 1 0 1 none_int | |
ba = push_5 1 0 1 none_int | |
bb = push_6 1 0 1 none_int | |
bc = push_7 1 0 1 none_int | |
bd = push_i8 2 0 1 i8 | |
be = push_i16 3 0 1 i16 | |
bf = push_const8 2 0 1 const8 | |
c0 = fclosure8 2 0 1 const8 | |
c1 = push_empty_string 1 0 1 none | |
c2 = get_loc8 2 0 1 loc8 | |
c3 = put_loc8 2 1 0 loc8 | |
c4 = set_loc8 2 1 1 loc8 | |
c5 = get_loc0 1 0 1 none_loc | |
c6 = get_loc1 1 0 1 none_loc | |
c7 = get_loc2 1 0 1 none_loc | |
c8 = get_loc3 1 0 1 none_loc | |
c9 = put_loc0 1 1 0 none_loc | |
ca = put_loc1 1 1 0 none_loc | |
cb = put_loc2 1 1 0 none_loc | |
cc = put_loc3 1 1 0 none_loc | |
cd = set_loc0 1 1 1 none_loc | |
ce = set_loc1 1 1 1 none_loc | |
cf = set_loc2 1 1 1 none_loc | |
d0 = set_loc3 1 1 1 none_loc | |
d1 = get_arg0 1 0 1 none_arg | |
d2 = get_arg1 1 0 1 none_arg | |
d3 = get_arg2 1 0 1 none_arg | |
d4 = get_arg3 1 0 1 none_arg | |
d5 = put_arg0 1 1 0 none_arg | |
d6 = put_arg1 1 1 0 none_arg | |
d7 = put_arg2 1 1 0 none_arg | |
d8 = put_arg3 1 1 0 none_arg | |
d9 = set_arg0 1 1 1 none_arg | |
da = set_arg1 1 1 1 none_arg | |
db = set_arg2 1 1 1 none_arg | |
dc = set_arg3 1 1 1 none_arg | |
dd = get_var_ref0 1 0 1 none_var_ref | |
de = get_var_ref1 1 0 1 none_var_ref | |
df = get_var_ref2 1 0 1 none_var_ref | |
e0 = get_var_ref3 1 0 1 none_var_ref | |
e1 = put_var_ref0 1 1 0 none_var_ref | |
e2 = put_var_ref1 1 1 0 none_var_ref | |
e3 = put_var_ref2 1 1 0 none_var_ref | |
e4 = put_var_ref3 1 1 0 none_var_ref | |
e5 = set_var_ref0 1 1 1 none_var_ref | |
e6 = set_var_ref1 1 1 1 none_var_ref | |
e7 = set_var_ref2 1 1 1 none_var_ref | |
e8 = set_var_ref3 1 1 1 none_var_ref | |
e9 = get_length 1 1 1 none | |
ea = if_false8 2 1 0 label8 | |
eb = if_true8 2 1 0 label8 | |
ec = goto8 2 0 0 label8 | |
ed = goto16 3 0 0 label16 | |
ee = call0 1 1 1 npopx | |
ef = call1 1 1 1 npopx | |
f0 = call2 1 1 1 npopx | |
f1 = call3 1 1 1 npopx | |
f2 = is_undefined 1 1 1 none | |
f3 = is_null 1 1 1 none | |
f4 = typeof_is_undefined 1 1 1 none | |
f5 = typeof_is_function 1 1 1 none | |
b4 = enter_scope 3 0 0 u16 | |
b5 = leave_scope 3 0 0 u16 | |
b6 = label 5 0 0 label | |
b7 = scope_get_var_undef 7 0 1 atom_u16 | |
b8 = scope_get_var 7 0 1 atom_u16 | |
b9 = scope_put_var 7 1 0 atom_u16 | |
ba = scope_delete_var 7 0 1 atom_u16 | |
bb = scope_make_ref 11 0 2 atom_label_u16 | |
bc = scope_get_ref 7 0 2 atom_u16 | |
bd = scope_put_var_init 7 0 2 atom_u16 | |
be = scope_get_private_field 7 1 1 atom_u16 | |
bf = scope_get_private_field2 7 1 2 atom_u16 | |
c0 = scope_put_private_field 7 1 1 atom_u16 | |
c1 = set_class_name 5 1 1 u32 | |
c2 = line_num 5 0 0 u32\ | |
''') | |
prog = [ | |
0x08, 0xea, 0x05, 0xc0, 0x00, 0xe2, 0x29, 0x29, | |
] | |
ctr = 0 | |
prgsz = len(prog) | |
while ctr < prgsz: | |
op = prog[ctr] | |
try: | |
opdesc = opcodes[op] | |
name, sz, _, _, typ = opdesc | |
except Exception: | |
raise Exception(f'Unknown opcode "0x{op:02x}"') | |
hex = f'{op:02x}' | |
if sz == 1: | |
print(f'{hex:30} {name}') | |
else: | |
bs = None if sz == 1 else bytes(prog[ctr+1:ctr+sz]) | |
data = 0 | |
fsdict = { | |
'u8': '=B', 'i8': '=b', | |
'u16': '=h', 'i16': '=H', | |
'u32': '=l', 'i32': '=L', | |
'loc8': '=B', | |
'const8': '=B', | |
'label8': '=B', | |
'label16': '=h', | |
'npop_u16': '=h', | |
} | |
if typ in fsdict: | |
data = struct.unpack(fsdict[typ], bs)[0] | |
else: | |
for i, b in enumerate(bs): | |
data += b << (i * 8) | |
hex += ''.join([f' {b:02x}' for b in bs]) | |
print(f'{hex:30} {name} ({typ}) {data}') | |
ctr += sz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "quickjs-libc.h" | |
const uint32_t qjsc_hello_size = 87; | |
const uint8_t qjsc_hello[87] = { | |
0x02, 0x04, 0x0e, 0x63, 0x6f, 0x6e, 0x73, 0x6f, | |
0x6c, 0x65, 0x06, 0x6c, 0x6f, 0x67, 0x16, 0x48, | |
0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, | |
0x6c, 0x64, 0x22, 0x65, 0x78, 0x61, 0x6d, 0x70, | |
0x6c, 0x65, 0x73, 0x2f, 0x68, 0x65, 0x6c, 0x6c, | |
0x6f, 0x2e, 0x6a, 0x73, 0x0e, 0x00, 0x06, 0x00, | |
0xa0, 0x01, 0x00, 0x01, 0x00, 0x03, 0x00, 0x00, | |
0x14, 0x01, 0xa2, 0x01, 0x00, 0x00, 0x00, 0x38, | |
0xe1, 0x00, 0x00, 0x00, 0x42, 0xe2, 0x00, 0x00, | |
0x00, 0x04, 0xe3, 0x00, 0x00, 0x00, 0x24, 0x01, | |
0x00, 0xcd, 0x28, 0xc8, 0x03, 0x01, 0x00, | |
}; | |
static JSContext *JS_NewCustomContext(JSRuntime *rt) | |
{ | |
JSContext *ctx = JS_NewContextRaw(rt); | |
if (!ctx) | |
return NULL; | |
JS_AddIntrinsicBaseObjects(ctx); | |
return ctx; | |
} | |
#include <stdio.h> | |
#include "quickjs.h" | |
int main(int argc, char **argv) | |
{ | |
JSRuntime *rt; | |
JSContext *ctx; | |
rt = JS_NewRuntime(); | |
js_std_set_worker_new_context_func(JS_NewCustomContext); | |
js_std_init_handlers(rt); | |
ctx = JS_NewCustomContext(rt); | |
// --- instrument starts here | |
JSValue obj, val; | |
obj = JS_ReadObject(ctx, qjsc_hello, qjsc_hello_size, JS_READ_OBJ_BYTECODE); | |
if (JS_IsException(obj)) { | |
printf("Failed to read object!!\n"); | |
return 1; | |
} | |
// load_only is FALSE... | |
uint32_t tag; | |
tag = JS_VALUE_GET_TAG(obj); | |
if (tag == JS_TAG_MODULE) { | |
if (JS_ResolveModule(ctx, obj) < 0) { | |
printf("JS_ResolveModule() failed!!\n"); | |
return 1; | |
} | |
// TODO: js_module_set_import_meta | |
} | |
// quickjs.c: val = JS_EvalFunction(ctx, obj); | |
// -> JS_EvalFunctionInternal(ctx, obj, ctx->global_obj, NULL, NULL) | |
uint8_t* bcbuf = NULL; | |
int bclen = 0; | |
if (tag == JS_TAG_FUNCTION_BYTECODE) { | |
// obj = js_closure(ctx, obj, NULL, NULL) as JSFunctionBytecode | |
char* objptr = /* (JSFunctionBytecode*) */ JS_VALUE_GET_PTR(obj); | |
// -> JS_CallFree(...) | |
// -> JS_CallInternal(...) | |
// FIXME: offsets may vary | |
bcbuf = *(uint8_t**)(objptr + 32); | |
bclen = *(int*)(objptr + 40); | |
} else if (tag == JS_TAG_MODULE) { | |
// assume not a C module | |
// m = JS_VALUE_GET_PTR(obj) as JSModuleDef | |
JSModuleDef* m = /* (JSModuleDef*) */ JS_VALUE_GET_PTR(obj); | |
printf("JSModuleDef ptr %p\n", m); | |
// XXX: is the func_obj what we want? | |
char* objptr = (char*)m + 208; | |
bcbuf = *(uint8_t**)(objptr + 32); | |
bclen = *(int*)(objptr + 40); | |
// -> js_create_module_function(ctx, m) | |
// -> js_create_module_bytecode_function(ctx, m) | |
// -> js_link_module(ctx, m) | |
// -> js_evaluate_module(ctx, m) | |
// -> JS_CallFree(ctx, m->func_obj, JS_UNDEFINED, 0, NULL) | |
} else { | |
printf("Tag is not bytecode (%d)!!\n", tag); | |
return 1; | |
} | |
printf("Dumping bytecode (len=%d)\n", bclen); | |
for (int i = 0; i < bclen; i++) { | |
printf("0x%02x,%c", bcbuf[i], i % 8 == 7 ? '\n' : ' '); | |
} | |
printf("\n"); | |
// --- instrument ends here | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define SHORT_OPCODES 1 | |
#define CONFIG_BIGNUM 1 | |
enum OPCodeEnum { | |
#define FMT(f) | |
#define DEF(id, size, n_pop, n_push, f) OP_ ## id, | |
#define def(id, size, n_pop, n_push, f) | |
#include "quickjs-opcode.h" | |
#undef def | |
#undef DEF | |
#undef FMT | |
OP_COUNT, /* excluding temporary opcodes */ | |
/* temporary opcodes : overlap with the short opcodes */ | |
OP_TEMP_START = OP_nop + 1, | |
OP___dummy = OP_TEMP_START - 1, | |
#define FMT(f) | |
#define DEF(id, size, n_pop, n_push, f) | |
#define def(id, size, n_pop, n_push, f) OP_ ## id, | |
#include "quickjs-opcode.h" | |
#undef def | |
#undef DEF | |
#undef FMT | |
OP_TEMP_END, | |
}; | |
#include <stdio.h> | |
int main() { | |
#define FMT(f) | |
#define DEF(id, size, n_pop, n_push, f) printf("%02x = %-32s %d %d %d %s\n", OP_ ## id, #id, size, n_pop, n_push, #f); | |
#define def(id, size, n_pop, n_push, f) | |
#include "quickjs-opcode.h" | |
#undef def | |
#undef DEF | |
#undef FMT | |
#define FMT(f) | |
#define DEF(id, size, n_pop, n_push, f) | |
#define def(id, size, n_pop, n_push, f) printf("%02x = %-32s %d %d %d %s\n", OP_ ## id, #id, size, n_pop, n_push, #f); | |
#include "quickjs-opcode.h" | |
#undef def | |
#undef DEF | |
#undef FMT | |
printf("count=0x%02x\n", OP_COUNT); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment