Last active
August 1, 2023 04:32
-
-
Save junyuecao/0c199f960b79233bd69e9f76b1772349 to your computer and use it in GitHub Desktop.
Add a name section to a stripped released wasm file with symbols file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Usage : | |
python3 wasm-addname.py input.wasm -s input.js.symbols -o input.name.wasm -f |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: UTF-8 -*- | |
import argparse | |
import os | |
import os.path | |
import urllib.parse | |
import shutil | |
class Section: | |
def __init__(self, f) : | |
section_id_byte = f.read(1) | |
if (section_id_byte == b''): | |
# EOF了 | |
self.id = None | |
return | |
# section fist byte is section id | |
self.start_pos = f.tell() | |
self.id = int.from_bytes(section_id_byte, 'big') | |
(section_size, read_bytes) = read_leb128(f) | |
self.body_start_pos = f.tell() | |
self.body_size = section_size | |
self.end_pos = self.body_start_pos + self.body_size | |
self.custom_section_type = None # only for custom sections | |
class Symbol: | |
def __init__(self, idx: int, name: str): | |
self.idx = idx | |
self.name = name | |
def section_id2name(id: int): | |
if id == 0: | |
return 'custom' | |
elif id == 1: | |
return 'type' | |
elif id == 2: | |
return 'import' | |
elif id == 3: | |
return 'function' | |
elif id == 4: | |
return 'table' | |
elif id == 5: | |
return 'memory' | |
elif id == 6: | |
return 'global' | |
elif id == 7: | |
return 'export' | |
elif id == 8: | |
return 'start' | |
elif id == 9: | |
return 'element' | |
elif id == 10: | |
return 'code' | |
elif id == 11: | |
return 'data' | |
elif id == 12: | |
return 'data_count' | |
else: | |
return 'error_id' | |
def decode_leb128(b: bytearray) -> int: | |
r = 0 | |
for i, e in enumerate(b): | |
r = r + ((e & 0x7f) << (i * 7)) | |
return r | |
def read_leb128(r): | |
a = bytearray() | |
while True: | |
b = ord(r.read(1)) | |
a.append(b) | |
if (b & 0x80) == 0: | |
break | |
return decode_leb128(a), len(a) | |
def encode_leb128(i: int) -> bytearray: | |
assert i >= 0 | |
r = [] | |
while True: | |
byte = i & 0x7f | |
i = i >> 7 | |
if i == 0: | |
r.append(byte) | |
return bytearray(r) | |
r.append(0x80 | byte) | |
def read_func_names(f): | |
# Read func names | |
# read count | |
(name_count, read_bytes) = read_leb128(f) | |
print("name_count={}, read_bytes={}".format(name_count, read_bytes)) | |
for i in range(name_count): | |
# read idx | |
(idx, read_bytes) = read_leb128(f) | |
# read name length | |
(fun_name_size, read_bytes) = read_leb128(f) | |
# read name | |
fun_name = f.read(fun_name_size).decode('utf-8') | |
# print("idx={}, fun_name={}".format(idx, fun_name)) | |
# Finish name section | |
def read_local_names(f): | |
# parse local names list | |
# Read next number | |
(name_assoc_count, read_bytes) = read_leb128(f) | |
print("name_assoc_count={}, read_bytes={}".format(name_assoc_count, read_bytes)) | |
for i in range(name_assoc_count): | |
# read idx | |
(idx, read_bytes) = read_leb128(f) | |
print("idx={}".format(idx)) | |
# read namemap | |
read_func_names(f) | |
pass | |
def read_name_subsec(f, name_subsec_id): | |
(subsec_size, read_bytes) = read_leb128(f) | |
print("names_subsec_size={}, read_bytes={} name_subsec_id={}".format(subsec_size, read_bytes, name_subsec_id)) | |
if name_subsec_id == 1: | |
# funcnames | |
read_func_names(f) | |
else: | |
# bypass | |
f.seek(subsec_size, 1) | |
return | |
def parse_input_wasm(input): | |
print(input) | |
sections = [] | |
with open(input, "r+b") as f: | |
# The first 8bytes is wasm+version | |
f.seek(8) | |
while True: | |
# 每个section第一个字节为section id | |
section = Section(f) | |
if section.id == None: | |
break | |
sections.append(section) | |
print("section_id = {}, name={}, section_size = {}, body_start_pos = {}" | |
.format(section.id, section_id2name(section.id), section.body_size, section.body_start_pos)) | |
# f.seek(section.body_size, 1); | |
if section.id == 0: | |
section_start = f.tell() | |
# 解析Custom section | |
(custom_name_size, read_bytes) = read_leb128(f) | |
custom_type = f.read(custom_name_size).decode('utf-8') | |
print("custom_type_name={} current_pos={}".format(custom_type, f.tell())) | |
section.custom_section_type = custom_type | |
if custom_type == 'name': | |
while f.tell() < section.end_pos: | |
name_subsec_id = int.from_bytes(f.read(1), 'big') | |
read_name_subsec(f, name_subsec_id) | |
else: | |
f.seek(section.body_size - read_bytes - custom_name_size, 1) | |
else: | |
f.seek(section.body_size, 1); | |
return sections | |
def parse_symbols(symbols): | |
result = [] | |
with open(symbols, 'r', encoding='utf-8') as f: | |
count = 0 | |
while True: | |
line = f.readline().strip() | |
count += 1 | |
if not line: | |
break | |
if len(line) == 0: | |
continue | |
spliter = line.find(':') | |
func = line[spliter + 1:].replace("\\", "%") | |
func_name = urllib.parse.unquote(func) | |
result.append(Symbol(int(line[0:spliter]), func_name)) | |
print("Parsed symbols total count is {}".format(len(result))) | |
return result | |
def assemble_name_assoc(idx:int, name:str): | |
# [idx][name_bytes][name] | |
ret = encode_leb128(idx) | |
bytes = name.encode('utf-8') | |
ret += encode_leb128(len(bytes)) | |
ret += bytes | |
return ret | |
def assemble_name_section(symbols): | |
# struct | |
# [id=0][sec_size][\x04name][[subsecid=01][sub_sec_size][count][[idx][name_bytes][name]]*n] | |
sec_body = bytearray(b'\x04name') #name section | |
sub_sec = bytearray(b'\x01') # subsecid=01 | |
sub_sec_body = bytearray() | |
for sym in symbols: | |
sub_sec_body += assemble_name_assoc(sym.idx, sym.name) | |
sub_sec_body = encode_leb128(len(symbols)) + sub_sec_body # prepend count | |
sub_sec += encode_leb128(len(sub_sec_body)) | |
sub_sec += sub_sec_body | |
sec_body += sub_sec | |
ret = bytearray(b'\x00') # custom section | |
ret += encode_leb128(len(sec_body)) | |
ret += sec_body | |
return ret | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Add a custom name section to release wasm with symbol file') | |
parser.add_argument('input', action='store', help='Input wasm path') | |
parser.add_argument('-s', '--symbols', dest='symbols', action='store', | |
help='Symbol file path') | |
parser.add_argument('-o', '--output', dest='output', action='store', | |
help='Outputed named wasm file path') | |
parser.add_argument('-f', '--force', dest='force', action='store_true', | |
help='Force overwrite target file') | |
args = parser.parse_args() | |
if args.output and os.path.exists(args.output) and not args.force: | |
print("File exists, use -f to overwrite\n") | |
parser.print_help() | |
exit(-1) | |
sections = parse_input_wasm(args.input) | |
for sec in sections: | |
if sec.custom_section_type == 'name': | |
print("A name section already exists\n") | |
exit(0) | |
if not args.symbols: | |
print("No symbol file,Please use -s to specify symbols file path\n") | |
exit(0) | |
symbols = parse_symbols(args.symbols) | |
bytes = assemble_name_section(symbols) | |
# print(bytes) | |
if args.output: | |
shutil.copy2(args.input, args.output) | |
with open(args.output, 'ab') as f: | |
f.write(bytes) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment