Skip to content

Instantly share code, notes, and snippets.

@RomanKharin
Last active August 29, 2015 14:23
Show Gist options
  • Save RomanKharin/96fdf4202d0e4b11fd5e to your computer and use it in GitHub Desktop.
Save RomanKharin/96fdf4202d0e4b11fd5e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import struct
from enum import IntEnum
from collections import namedtuple
from decodeenums import I32, I32WithImm, RType, Type, VarTypes, Stmt, StmtWithImm, ExportFormat
Signature = namedtuple("Signature", ["ret", "args"])
FuncImportSignature = namedtuple("FuncImportSignature",
["sig_index", "func_imp_index"])
FuncPtrTable = namedtuple("FuncPtrTable", ["sig_index", "elems"])
class WasmBinary:
def __init__(self):
self.sigs = []
self.i32s = []
self.f32s = []
self.f64s = []
self.func_names = []
self.func_imp_sigs = []
self.global_types = []
self.global_vals = []
#uint32_t func_name_base_;
self.func_sigs = []
#uint32_t func_ptr_table_name_base_;
self.func_ptr_tables = []
#
#uint32_t num_labels_;
#RType cur_ret_;
#vector<Type> cur_local_types_;
self.unpacked_size = -1
def decode_wasm(self, f):
# read magick
temp = f.read(4)
if temp != b"wasm":
raise Exception("Not a wasm binary")
# upacked length
temp = f.read(4)
self.unpacked_size = struct.unpack("<I", temp)[0]
#print("self.unpacked_size", self.unpacked_size)
self.read_constant_pool_section(f)
print("Constants", self.i32s, self.f32s, self.f64s)
self.read_signature_section(f)
print(self.sigs)
self.read_function_import_section(f)
print(self.func_names)
print(self.func_imp_sigs)
self.read_global_section(f)
print("Global", list(zip(self.global_types, self.global_vals)))
self.read_function_declaration_section(f)
print("Func sigs", self.func_sigs)
self.read_function_pointer_tables(f)
print("Ptr tables", self.func_ptr_tables)
self.read_function_definition_section(f)
self.read_export_section(f)
def read_vlq32(self, f):
# https://en.wikipedia.org/wiki/Variable-length_quantity
# unsigned, max 32bit
value = f.read(1)[0]
if value < 0x80:
return value
value &= 0x7f
shift = 7
while True:
b = f.read(1)[0]
if b < 0x80:
return value | (b << shift)
value |= (b & 0x7f) << shift
shift += 7
#if shift > 21:
# break
return value
def read_vlq32i(self, f):
val = self.read_vlq32(f)
if val < 0x80000000:
return val
return val - 0x100000000
def read_str(self, f):
v = b''
while True:
c = f.read(1)
if c[0] == 0:
break
v += c
return v.decode("UTF-8")
def read_code(self, f):
code = f.read(1)[0]
if not (code & 0x80):
return (True, code, None, None) # raw, value
else:
op = (code >> 5) & 3 # high 2 bits
imm = code & 31 # lower 5 bits
return (False, code, op, imm)
def read_constant_pool_section(self, f):
num_i32s = self.read_vlq32(f)
num_f32s = self.read_vlq32(f)
num_f64s = self.read_vlq32(f)
for i in range(num_i32s):
self.i32s.append(self.read_vlq32(f))
for i in range(num_f32s):
temp = f.read(4)
self.f32s.append(struct.unpack("<f", temp)[0])
for i in range(num_f64s):
temp = f.read(8)
self.f32s.append(struct.unpack("<d", temp)[0])
def read_signature_section(self, f):
self.sigs = []
num_sigs = self.read_vlq32(f)
for i in range(num_sigs):
ret = RType(f.read(1)[0])
num_args = self.read_vlq32(f)
args = []
for i in range(num_args):
args.append(Type(f.read(1)[0]))
self.sigs.append(Signature(ret, args))
def read_function_import_section(self, f):
num_func_imps = self.read_vlq32(f)
num_func_imp_sigs = self.read_vlq32(f)
for i in range(num_func_imps):
self.func_names.append(self.read_str(f))
num_sigs = self.read_vlq32(f)
for j in range(num_sigs):
sig = self.read_vlq32(f)
self.func_imp_sigs.append(FuncImportSignature(sig, i))
def read_global_section(self, f):
num_i32_zero = self.read_vlq32(f)
num_f32_zero = self.read_vlq32(f)
num_f64_zero = self.read_vlq32(f)
num_i32_import = self.read_vlq32(f)
num_f32_import = self.read_vlq32(f)
num_f64_import = self.read_vlq32(f)
for i in range(num_i32_zero):
self.global_types.append(Type.I32)
self.global_vals.append(0)
for i in range(num_f32_zero):
self.global_types.append(Type.F32)
self.global_vals.append(0.0)
for i in range(num_f64_zero):
self.global_types.append(Type.F64)
self.global_vals.append(0.0)
for i in range(num_i32_import):
self.global_types.append(Type.I32)
self.global_vals.append(self.read_str(f))
for i in range(num_f32_import):
self.global_types.append(Type.F32)
self.global_vals.append(self.read_str(f))
for i in range(num_f64_import):
self.global_types.append(Type.F64)
self.global_vals.append(self.read_str(f))
def read_function_declaration_section(self, f):
num_funcs = self.read_vlq32(f)
for i in range(num_funcs):
self.func_sigs.append(self.read_vlq32(f))
def read_function_pointer_tables(self, f):
num_func_ptr_tables = self.read_vlq32(f)
for i in range(num_func_ptr_tables):
sig_index = self.read_vlq32(f)
num_elems = self.read_vlq32(f)
elems = []
for j in range(num_elems):
elems.append(self.read_vlq32(f))
self.func_ptr_tables.append(FuncPtrTable(sig_index, elems))
def read_add_sub(self, f, op, tp):
v1 = self.read_expr(f, tp)
v2 = self.read_expr(f, tp)
return (op, v1, v2)
def read_comma(self, f, tp):
tp1 = RType(f.read(1)[0])
v1 = self.read_expr(f, tp1)
v2 = self.read_expr(f, tp)
return (op, v1, v2)
def read_stmt(self, f):
raw, code, stmt, imm = self.read_code(f)
ret = []
print("STMT", raw, code, stmt, imm, "stmt=", code & 0x7f)
if raw:
stcode = Stmt(code)
if code == Stmt.SetLoc:
ret = self.read_set_local(f)
elif code == Stmt.SetGlo:
ret = self.read_set_global(f)
else:
raise Exception("Unknown statement " + str(code))
else:
if stmt == StmtWithImm.SetLoc:
stcode = Stmt.SetLoc
ret = self.read_set_local(f, imm)
elif stmt == StmtWithImm.SetGlo:
stcode = Stmt.SetGlo
ret = self.read_set_global(f, imm)
else:
raise Exception("Unknown statement with imm " + str(stmt))
return (stcode,) + ret
def read_stmt_list(self, f):
num_stmts = self.read_vlq32(f)
lst = []
if not num_stmts:
print("***No statements")
for i in range(num_stmts):
lst.append(self.read_stmt(f))
return lst
def read_set_local(self, f, loc = None):
if loc is None:
loc = self.read_vlq32(f)
# read type
#print("SET_LOCAL", loc, self.cur_local_type)
tp = self.cur_local_type[loc]
expr = self.read_expr(f, tp)
return (loc, expr)
def read_set_global(self, f, loc = None):
if loc is None:
loc = self.read_vlq32(f)
# read type
#print("SET_GLOBAL", loc, self.global_types)
tp = self.global_types[loc]
expr = self.read_expr(f, tp)
return (loc, expr)
def read_expr(self, f, tp):
if tp == Type.I32:
ret = self.read_expr_i32(f)
else:
raise Exception("Unknown type for expression " + str(tp))
return (tp,) + ret
def read_expr_i32(self, f):
raw, code, expr, imm = self.read_code(f)
ret = []
print("EXPR", raw, code, expr, imm, "expr=", code & 0x7f)
if raw:
excode = I32(code)
if code == I32.LitImm:
ret = (self.read_vlq32i(f),)
elif code == I32.Comma:
ret = self.read_comma(f, tp = Type.I32)
elif code == I32.Add:
ret = self.read_add_sub(f, tp = Type.I32, op = "+")
elif code == I32.Sub:
ret = self.read_add_sub(f, tp = Type.I32, op = "-")
else:
raise Exception("Unknown expression I32 " + str(code))
else:
if expr == I32WithImm.LitImm:
excode = I32WithImm.LitImm
ret = (imm,)
else:
raise Exception("Unknown expression I32 with imm " + str(expr))
return (excode,) + ret
def read_function_definition_section(self, f):
for i in range(len(self.func_sigs)):
# construct local arg types from sig
sig = self.sigs[self.func_sigs[i]]
self.cur_local_type = list(sig.args)
# read vars types
num_i32_vars = 0
num_f32_vars = 0
num_f64_vars = 0
raw, code, op, imm = self.read_code(f)
if raw:
if code & VarTypes.I32:
num_i32_vars = self.read_vlq32(f)
if code & VarTypes.F32:
num_f32_vars = self.read_vlq32(f)
if code & VarTypes.F64:
num_f64_vars = self.read_vlq32(f)
else:
num_i32_vars = imm
# construct local var types from code (ie num_*_vars)
for j in range(num_i32_vars):
self.cur_local_type.append(Type.I32)
for j in range(num_f32_vars):
self.cur_local_type.append(Type.F32)
for j in range(num_f64_vars):
self.cur_local_type.append(Type.F64)
print("Function f%d: %d, %d, %d" % (i, num_i32_vars,
num_f32_vars, num_f64_vars))
print(" ", sig)
print(" local", self.cur_local_type)
# TODO: save stmt
stmts = self.read_stmt_list(f)
print(" stmt", len(stmts))
for stmt in stmts:
print(stmt)
def read_export_section(self, f):
fmt = ExportFormat(f.read(1)[0])
if fmt == ExportFormat.Default:
funcnum = self.read_vlq32(f)
print("Export default function #%d" % funcnum)
elif fmt == ExportFormat.Record:
elen = self.read_vlq32(f)
for i in range(elen):
func = self.read_str(f)
fidx = self.read_vlq32(f)
print("Export record #%d: \"%s\": #%d" % (i, func, fidx))
def main():
wb = WasmBinary()
with open(sys.argv[1], "rb") as f:
wb.decode_wasm(f)
data = f.read()
if len(data):
print("Unparsed %d bytes" % len(data))
if __name__ == "__main__":
main()
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Parse shared.h to create enums
import os
import sys
from enum import IntEnum
def parse_shared(f, fn):
inenum = False
print("# -*- coding: utf-8 -*-")
print("# Autogenerated from \"%s\"" % fn)
print()
print("from enum import IntEnum")
print()
allarr = {}
ename = None
for idx, line in enumerate(f.readlines(), 1):
line = line.strip()
if inenum:
if line == "};":
inenum = False
if eidx == 0:
print(" pass")
print()
else:
if not line: continue
if line == "{": continue
if line[-1:] == ",":
line = line[:-1]
ivalue = None
if "=" in line:
# has value
lv = line.split("=")
ident = lv[0].strip()
value = lv[1].strip()
if value.startswith("uint8_t("):
value = value[8:]
if value[-1:] == ")": value = value[:-1]
lp = value.split("::")
ivalue = allarr[lp[0]][lp[1]]
print(" # eq to %s" % value)
value = "%d" % ivalue
else:
try:
if value[:2].lower() == "0x":
ivalue = int(value[2:], 16)
else:
ivalue = int(value, 10)
except:
pass
else:
ident = line
ivalue = eidx
value = "%d" % ivalue
print(" %s = %s" % (ident, value))
allarr[ename][ident] = ivalue
eidx = ivalue + 1
else:
if line.startswith("enum class "):
inenum = True
eidx = 0
ename = line[10:].split(":")[0].strip()
print("# enumerate %s at line %d" % (ename, idx))
print("class %s(IntEnum):" % ename)
allarr[ename] = {}
def main():
with open(sys.argv[1], "r") as f:
parse_shared(f, sys.argv[1])
if __name__ == "__main__":
main()

Polyfill wasm file format

Note: this format are subject to change (heavy changes)

https://github.com/WebAssembly/polyfill-prototype-1

Данные основаны на утилите конвертации polyfill-prototype-1 asm.js<->wasm

Обозначения u8 - 8 бит беззнаковое целое значение int32 - 32 бит целое значение uint32 - 32 бит беззнаковое целое значение float - 32 бит дробное double - 64 бит дробное VLQ-32 - значение переменной длины https://en.wikipedia.org/wiki/Variable-length_quantity, максимально может иметь 32 бита

  1. Магическая константа b'wasm'
  2. Размер распакованных данных, uint32
  3. Константы constant_pool_section
  4. Сигнатуры signature_section
  5. Импортируемые функции function_import_section
  6. Глобальные переменные global_section
  7. Декларированные функции function_declaration_section
  8. read_function_pointer_tables
  9. function_definition_section
  10. export_section

Константы constant_pool_section

  1. количество целочисленных констант int32 или uint32: num_i32s (тип VLQ-32)
  2. количество констант float: num_f32s (тип VLQ-32)
  3. количество констант double: num_f64s (тип VLQ-32)
  4. num_i32s записей VLQ-32
  5. num_f32s записей float
  6. num_f64s записей double

Сигнатуры signature_section

  1. количество сигнатур функций num_sigs (тип VLQ-32)
  2. num_sigs записей (тип VLQ-32)
  • тип возвращаемого значения
  • количество аргументов num_args (тип VLQ-32)
  • типы аргументов

Тип возвращаемого значения u8 (I32, F32, F64 или Void)

Тип аргумента: u8 (I32, F32 или F64)

Импортируемые функции function_import_section

  1. количество импортируемых функций num_func_imps (VLQ-32)
  2. количество типов сигнатур функций num_func_imp_sigs (VLQ-32)
  3. записи на каждую функцию
  • имя, оканчивающееся 0
  • количество сигнатур этой функции num_sigs (VLQ-32)
  • num_sigs записей, каждая имеет тип VLQ-32, указывает на номер сигнатуры

todo

Списки

см. https://github.com/WebAssembly/polyfill-prototype-1/blob/master/src/shared.h

function asmModule($a,$b,$c){'use asm';
var j=$a.Math.fround;
function $w(k,l){
k=k|0;l=+l;
var m=0,n=j(0),o=0.;
k=1+2+3|0;
k=-4-5-6|0;
}
return {};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment