Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save junyuecao/0c199f960b79233bd69e9f76b1772349 to your computer and use it in GitHub Desktop.
Save junyuecao/0c199f960b79233bd69e9f76b1772349 to your computer and use it in GitHub Desktop.
Add a name section to a stripped released wasm file with symbols file
Usage :
python3 wasm-addname.py input.wasm -s input.js.symbols -o input.name.wasm -f
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import argparse
import os
import os.path
import urllib.parse
import shutil
class Section:
def __init__(self, f) :
section_id_byte = f.read(1)
if (section_id_byte == b''):
# EOF了
self.id = None
return
# section fist byte is section id
self.start_pos = f.tell()
self.id = int.from_bytes(section_id_byte, 'big')
(section_size, read_bytes) = read_leb128(f)
self.body_start_pos = f.tell()
self.body_size = section_size
self.end_pos = self.body_start_pos + self.body_size
self.custom_section_type = None # only for custom sections
class Symbol:
def __init__(self, idx: int, name: str):
self.idx = idx
self.name = name
def section_id2name(id: int):
if id == 0:
return 'custom'
elif id == 1:
return 'type'
elif id == 2:
return 'import'
elif id == 3:
return 'function'
elif id == 4:
return 'table'
elif id == 5:
return 'memory'
elif id == 6:
return 'global'
elif id == 7:
return 'export'
elif id == 8:
return 'start'
elif id == 9:
return 'element'
elif id == 10:
return 'code'
elif id == 11:
return 'data'
elif id == 12:
return 'data_count'
else:
return 'error_id'
def decode_leb128(b: bytearray) -> int:
r = 0
for i, e in enumerate(b):
r = r + ((e & 0x7f) << (i * 7))
return r
def read_leb128(r):
a = bytearray()
while True:
b = ord(r.read(1))
a.append(b)
if (b & 0x80) == 0:
break
return decode_leb128(a), len(a)
def encode_leb128(i: int) -> bytearray:
assert i >= 0
r = []
while True:
byte = i & 0x7f
i = i >> 7
if i == 0:
r.append(byte)
return bytearray(r)
r.append(0x80 | byte)
def read_func_names(f):
# Read func names
# read count
(name_count, read_bytes) = read_leb128(f)
print("name_count={}, read_bytes={}".format(name_count, read_bytes))
for i in range(name_count):
# read idx
(idx, read_bytes) = read_leb128(f)
# read name length
(fun_name_size, read_bytes) = read_leb128(f)
# read name
fun_name = f.read(fun_name_size).decode('utf-8')
# print("idx={}, fun_name={}".format(idx, fun_name))
# Finish name section
def read_local_names(f):
# parse local names list
# Read next number
(name_assoc_count, read_bytes) = read_leb128(f)
print("name_assoc_count={}, read_bytes={}".format(name_assoc_count, read_bytes))
for i in range(name_assoc_count):
# read idx
(idx, read_bytes) = read_leb128(f)
print("idx={}".format(idx))
# read namemap
read_func_names(f)
pass
def read_name_subsec(f, name_subsec_id):
(subsec_size, read_bytes) = read_leb128(f)
print("names_subsec_size={}, read_bytes={} name_subsec_id={}".format(subsec_size, read_bytes, name_subsec_id))
if name_subsec_id == 1:
# funcnames
read_func_names(f)
else:
# bypass
f.seek(subsec_size, 1)
return
def parse_input_wasm(input):
print(input)
sections = []
with open(input, "r+b") as f:
# The first 8bytes is wasm+version
f.seek(8)
while True:
# 每个section第一个字节为section id
section = Section(f)
if section.id == None:
break
sections.append(section)
print("section_id = {}, name={}, section_size = {}, body_start_pos = {}"
.format(section.id, section_id2name(section.id), section.body_size, section.body_start_pos))
# f.seek(section.body_size, 1);
if section.id == 0:
section_start = f.tell()
# 解析Custom section
(custom_name_size, read_bytes) = read_leb128(f)
custom_type = f.read(custom_name_size).decode('utf-8')
print("custom_type_name={} current_pos={}".format(custom_type, f.tell()))
section.custom_section_type = custom_type
if custom_type == 'name':
while f.tell() < section.end_pos:
name_subsec_id = int.from_bytes(f.read(1), 'big')
read_name_subsec(f, name_subsec_id)
else:
f.seek(section.body_size - read_bytes - custom_name_size, 1)
else:
f.seek(section.body_size, 1);
return sections
def parse_symbols(symbols):
result = []
with open(symbols, 'r', encoding='utf-8') as f:
count = 0
while True:
line = f.readline().strip()
count += 1
if not line:
break
if len(line) == 0:
continue
spliter = line.find(':')
func = line[spliter + 1:].replace("\\", "%")
func_name = urllib.parse.unquote(func)
result.append(Symbol(int(line[0:spliter]), func_name))
print("Parsed symbols total count is {}".format(len(result)))
return result
def assemble_name_assoc(idx:int, name:str):
# [idx][name_bytes][name]
ret = encode_leb128(idx)
bytes = name.encode('utf-8')
ret += encode_leb128(len(bytes))
ret += bytes
return ret
def assemble_name_section(symbols):
# struct
# [id=0][sec_size][\x04name][[subsecid=01][sub_sec_size][count][[idx][name_bytes][name]]*n]
sec_body = bytearray(b'\x04name') #name section
sub_sec = bytearray(b'\x01') # subsecid=01
sub_sec_body = bytearray()
for sym in symbols:
sub_sec_body += assemble_name_assoc(sym.idx, sym.name)
sub_sec_body = encode_leb128(len(symbols)) + sub_sec_body # prepend count
sub_sec += encode_leb128(len(sub_sec_body))
sub_sec += sub_sec_body
sec_body += sub_sec
ret = bytearray(b'\x00') # custom section
ret += encode_leb128(len(sec_body))
ret += sec_body
return ret
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Add a custom name section to release wasm with symbol file')
parser.add_argument('input', action='store', help='Input wasm path')
parser.add_argument('-s', '--symbols', dest='symbols', action='store',
help='Symbol file path')
parser.add_argument('-o', '--output', dest='output', action='store',
help='Outputed named wasm file path')
parser.add_argument('-f', '--force', dest='force', action='store_true',
help='Force overwrite target file')
args = parser.parse_args()
if args.output and os.path.exists(args.output) and not args.force:
print("File exists, use -f to overwrite\n")
parser.print_help()
exit(-1)
sections = parse_input_wasm(args.input)
for sec in sections:
if sec.custom_section_type == 'name':
print("A name section already exists\n")
exit(0)
if not args.symbols:
print("No symbol file,Please use -s to specify symbols file path\n")
exit(0)
symbols = parse_symbols(args.symbols)
bytes = assemble_name_section(symbols)
# print(bytes)
if args.output:
shutil.copy2(args.input, args.output)
with open(args.output, 'ab') as f:
f.write(bytes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment