junyuecao/Add a name section to a stripped released wasm file with symbols file

## Add a name section to a stripped released wasm file with symbols file
Usage :
python3 wasm-addname.py input.wasm -s input.js.symbols -o input.name.wasm -f

## wasm-addname.py
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-


import argparse
import os
import os.path
import urllib.parse
import shutil


class Section:
    def __init__(self, f) :
        section_id_byte = f.read(1)
        if (section_id_byte == b''):
            # EOF了
            self.id = None
            return
        # section fist byte is section id

        self.start_pos = f.tell()
        self.id = int.from_bytes(section_id_byte, 'big')
        (section_size, read_bytes) = read_leb128(f)
        self.body_start_pos = f.tell()
        self.body_size = section_size
        self.end_pos = self.body_start_pos + self.body_size
        self.custom_section_type = None # only for custom sections


class Symbol:
    def __init__(self, idx: int, name: str):
        self.idx = idx
        self.name = name

def section_id2name(id: int):
    if id == 0:
        return 'custom'
    elif id == 1:
        return 'type'
    elif id == 2:
        return 'import'
    elif id == 3:
        return 'function'
    elif id == 4:
        return 'table'
    elif id == 5:
        return 'memory'
    elif id == 6:
        return 'global'
    elif id == 7:
        return 'export'
    elif id == 8:
        return 'start'
    elif id == 9:
        return 'element'
    elif id == 10:
        return 'code'
    elif id == 11:
        return 'data'
    elif id == 12:
        return 'data_count'
    else:
        return 'error_id'

def decode_leb128(b: bytearray) -> int:
        r = 0
        for i, e in enumerate(b):
            r = r + ((e & 0x7f) << (i * 7))
        return r

def read_leb128(r):
        a = bytearray()
        while True:
            b = ord(r.read(1))
            a.append(b)
            if (b & 0x80) == 0:
                break
        return decode_leb128(a), len(a)

def encode_leb128(i: int) -> bytearray:
        assert i >= 0
        r = []
        while True:
            byte = i & 0x7f
            i = i >> 7
            if i == 0:
                r.append(byte)
                return bytearray(r)
            r.append(0x80 | byte)


def read_func_names(f):
    # Read func names
    # read count
    (name_count, read_bytes) = read_leb128(f)
    print("name_count={}, read_bytes={}".format(name_count, read_bytes))
    for i in range(name_count):
        # read idx
        (idx, read_bytes) = read_leb128(f)
        # read name length
        (fun_name_size, read_bytes) = read_leb128(f)
        # read name
        fun_name = f.read(fun_name_size).decode('utf-8')
        # print("idx={}, fun_name={}".format(idx, fun_name))

    # Finish name section

def read_local_names(f):
    # parse local names list
    # Read next number
    (name_assoc_count, read_bytes) = read_leb128(f)
    print("name_assoc_count={}, read_bytes={}".format(name_assoc_count, read_bytes))

    for i in range(name_assoc_count):
        # read idx
        (idx, read_bytes) = read_leb128(f)
        print("idx={}".format(idx))
        # read namemap
        read_func_names(f)
    pass


def read_name_subsec(f, name_subsec_id):
    (subsec_size, read_bytes) = read_leb128(f)
    print("names_subsec_size={}, read_bytes={} name_subsec_id={}".format(subsec_size, read_bytes, name_subsec_id))
    if name_subsec_id == 1:
        # funcnames
        read_func_names(f)
    else:
        # bypass
        f.seek(subsec_size, 1)
        return


def parse_input_wasm(input):
    print(input)
    sections = []
    with open(input, "r+b") as f:
        # The first 8bytes is wasm+version

        f.seek(8)
        while True:
            # 每个section第一个字节为section id
            section = Section(f)
            if section.id == None:
                break
            sections.append(section)

            print("section_id = {}, name={}, section_size = {}, body_start_pos = {}"
                  .format(section.id, section_id2name(section.id), section.body_size, section.body_start_pos))
            # f.seek(section.body_size, 1);
            if section.id == 0:
                section_start = f.tell()
                # 解析Custom section
                (custom_name_size, read_bytes) = read_leb128(f)
                custom_type = f.read(custom_name_size).decode('utf-8')
                print("custom_type_name={} current_pos={}".format(custom_type, f.tell()))
                section.custom_section_type = custom_type
                if custom_type == 'name':
                    while f.tell() < section.end_pos:
                        name_subsec_id = int.from_bytes(f.read(1), 'big')
                        read_name_subsec(f, name_subsec_id)

                else:
                    f.seek(section.body_size - read_bytes - custom_name_size, 1)
            else:
                f.seek(section.body_size, 1);
    return sections

def parse_symbols(symbols):
    result = []
    with open(symbols, 'r', encoding='utf-8') as f:
        count = 0
        while True:
            line = f.readline().strip()
            count += 1
            if not line:
                break
            if len(line) == 0:
                continue

            spliter = line.find(':')
            func = line[spliter + 1:].replace("\\", "%")

            func_name = urllib.parse.unquote(func)
            result.append(Symbol(int(line[0:spliter]), func_name))
    print("Parsed symbols total count is {}".format(len(result)))
    return result

def assemble_name_assoc(idx:int, name:str):
    # [idx][name_bytes][name]
    ret = encode_leb128(idx)
    bytes = name.encode('utf-8')

    ret += encode_leb128(len(bytes))
    ret += bytes
    return ret

def assemble_name_section(symbols):
    # struct
    # [id=0][sec_size][\x04name][[subsecid=01][sub_sec_size][count][[idx][name_bytes][name]]*n]
    sec_body = bytearray(b'\x04name') #name section

    sub_sec = bytearray(b'\x01') # subsecid=01
    sub_sec_body = bytearray()
    for sym in symbols:
        sub_sec_body += assemble_name_assoc(sym.idx, sym.name)
    sub_sec_body = encode_leb128(len(symbols)) + sub_sec_body # prepend count
    sub_sec += encode_leb128(len(sub_sec_body))
    sub_sec += sub_sec_body

    sec_body += sub_sec

    ret = bytearray(b'\x00') # custom section
    ret += encode_leb128(len(sec_body))
    ret += sec_body

    return ret

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Add a custom name section to release wasm with symbol file')
    parser.add_argument('input', action='store', help='Input wasm path')
    parser.add_argument('-s', '--symbols', dest='symbols', action='store',
                        help='Symbol file path')
    parser.add_argument('-o', '--output', dest='output', action='store',
                        help='Outputed named wasm file path')
    parser.add_argument('-f', '--force', dest='force', action='store_true',
                        help='Force overwrite target file')

    args = parser.parse_args()

    if args.output and os.path.exists(args.output) and not args.force:
        print("File exists, use -f to overwrite\n")
        parser.print_help()
        exit(-1)

    sections = parse_input_wasm(args.input)

    for sec in sections:
        if sec.custom_section_type == 'name':
            print("A name section already exists\n")
            exit(0)

    if not args.symbols:
        print("No symbol file，Please use -s to specify symbols file path\n")
        exit(0)

    symbols = parse_symbols(args.symbols)
    bytes = assemble_name_section(symbols)
    # print(bytes)

    if args.output:
        shutil.copy2(args.input, args.output)
        with open(args.output, 'ab') as f:
            f.write(bytes)
	Usage :
	python3 wasm-addname.py input.wasm -s input.js.symbols -o input.name.wasm -f
	#!/usr/bin/env python3
	# -- coding: UTF-8 --


	import argparse
	import os
	import os.path
	import urllib.parse
	import shutil


	class Section:
	def __init__(self, f) :
	section_id_byte = f.read(1)
	if (section_id_byte == b''):
	# EOF了
	self.id = None
	return
	# section fist byte is section id

	self.start_pos = f.tell()
	self.id = int.from_bytes(section_id_byte, 'big')
	(section_size, read_bytes) = read_leb128(f)
	self.body_start_pos = f.tell()
	self.body_size = section_size
	self.end_pos = self.body_start_pos + self.body_size
	self.custom_section_type = None # only for custom sections



	class Symbol:
	def __init__(self, idx: int, name: str):
	self.idx = idx
	self.name = name

	def section_id2name(id: int):
	if id == 0:
	return 'custom'
	elif id == 1:
	return 'type'
	elif id == 2:
	return 'import'
	elif id == 3:
	return 'function'
	elif id == 4:
	return 'table'
	elif id == 5:
	return 'memory'
	elif id == 6:
	return 'global'
	elif id == 7:
	return 'export'
	elif id == 8:
	return 'start'
	elif id == 9:
	return 'element'
	elif id == 10:
	return 'code'
	elif id == 11:
	return 'data'
	elif id == 12:
	return 'data_count'
	else:
	return 'error_id'

	def decode_leb128(b: bytearray) -> int:
	r = 0
	for i, e in enumerate(b):
	r = r + ((e & 0x7f) << (i * 7))
	return r

	def read_leb128(r):
	a = bytearray()
	while True:
	b = ord(r.read(1))
	a.append(b)
	if (b & 0x80) == 0:
	break
	return decode_leb128(a), len(a)

	def encode_leb128(i: int) -> bytearray:
	assert i >= 0
	r = []
	while True:
	byte = i & 0x7f
	i = i >> 7
	if i == 0:
	r.append(byte)
	return bytearray(r)
	r.append(0x80 \| byte)


	def read_func_names(f):
	# Read func names
	# read count
	(name_count, read_bytes) = read_leb128(f)
	print("name_count={}, read_bytes={}".format(name_count, read_bytes))
	for i in range(name_count):
	# read idx
	(idx, read_bytes) = read_leb128(f)
	# read name length
	(fun_name_size, read_bytes) = read_leb128(f)
	# read name
	fun_name = f.read(fun_name_size).decode('utf-8')
	# print("idx={}, fun_name={}".format(idx, fun_name))

	# Finish name section

	def read_local_names(f):
	# parse local names list
	# Read next number
	(name_assoc_count, read_bytes) = read_leb128(f)
	print("name_assoc_count={}, read_bytes={}".format(name_assoc_count, read_bytes))

	for i in range(name_assoc_count):
	# read idx
	(idx, read_bytes) = read_leb128(f)
	print("idx={}".format(idx))
	# read namemap
	read_func_names(f)
	pass


	def read_name_subsec(f, name_subsec_id):
	(subsec_size, read_bytes) = read_leb128(f)
	print("names_subsec_size={}, read_bytes={} name_subsec_id={}".format(subsec_size, read_bytes, name_subsec_id))
	if name_subsec_id == 1:
	# funcnames
	read_func_names(f)
	else:
	# bypass
	f.seek(subsec_size, 1)
	return



	def parse_input_wasm(input):
	print(input)
	sections = []
	with open(input, "r+b") as f:
	# The first 8bytes is wasm+version

	f.seek(8)
	while True:
	# 每个section第一个字节为section id
	section = Section(f)
	if section.id == None:
	break
	sections.append(section)

	print("section_id = {}, name={}, section_size = {}, body_start_pos = {}"
	.format(section.id, section_id2name(section.id), section.body_size, section.body_start_pos))
	# f.seek(section.body_size, 1);
	if section.id == 0:
	section_start = f.tell()
	# 解析Custom section
	(custom_name_size, read_bytes) = read_leb128(f)
	custom_type = f.read(custom_name_size).decode('utf-8')
	print("custom_type_name={} current_pos={}".format(custom_type, f.tell()))
	section.custom_section_type = custom_type
	if custom_type == 'name':
	while f.tell() < section.end_pos:
	name_subsec_id = int.from_bytes(f.read(1), 'big')
	read_name_subsec(f, name_subsec_id)

	else:
	f.seek(section.body_size - read_bytes - custom_name_size, 1)
	else:
	f.seek(section.body_size, 1);
	return sections

	def parse_symbols(symbols):
	result = []
	with open(symbols, 'r', encoding='utf-8') as f:
	count = 0
	while True:
	line = f.readline().strip()
	count += 1
	if not line:
	break
	if len(line) == 0:
	continue

	spliter = line.find(':')
	func = line[spliter + 1:].replace("\\", "%")

	func_name = urllib.parse.unquote(func)
	result.append(Symbol(int(line[0:spliter]), func_name))
	print("Parsed symbols total count is {}".format(len(result)))
	return result

	def assemble_name_assoc(idx:int, name:str):
	# [idx][name_bytes][name]
	ret = encode_leb128(idx)
	bytes = name.encode('utf-8')

	ret += encode_leb128(len(bytes))
	ret += bytes
	return ret

	def assemble_name_section(symbols):
	# struct
	# [id=0][sec_size][\x04name][[subsecid=01][sub_sec_size][count][[idx][name_bytes][name]]*n]
	sec_body = bytearray(b'\x04name') #name section

	sub_sec = bytearray(b'\x01') # subsecid=01
	sub_sec_body = bytearray()
	for sym in symbols:
	sub_sec_body += assemble_name_assoc(sym.idx, sym.name)
	sub_sec_body = encode_leb128(len(symbols)) + sub_sec_body # prepend count
	sub_sec += encode_leb128(len(sub_sec_body))
	sub_sec += sub_sec_body

	sec_body += sub_sec

	ret = bytearray(b'\x00') # custom section
	ret += encode_leb128(len(sec_body))
	ret += sec_body

	return ret

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description='Add a custom name section to release wasm with symbol file')
	parser.add_argument('input', action='store', help='Input wasm path')
	parser.add_argument('-s', '--symbols', dest='symbols', action='store',
	help='Symbol file path')
	parser.add_argument('-o', '--output', dest='output', action='store',
	help='Outputed named wasm file path')
	parser.add_argument('-f', '--force', dest='force', action='store_true',
	help='Force overwrite target file')

	args = parser.parse_args()

	if args.output and os.path.exists(args.output) and not args.force:
	print("File exists, use -f to overwrite\n")
	parser.print_help()
	exit(-1)

	sections = parse_input_wasm(args.input)

	for sec in sections:
	if sec.custom_section_type == 'name':
	print("A name section already exists\n")
	exit(0)

	if not args.symbols:
	print("No symbol file，Please use -s to specify symbols file path\n")
	exit(0)

	symbols = parse_symbols(args.symbols)
	bytes = assemble_name_section(symbols)
	# print(bytes)

	if args.output:
	shutil.copy2(args.input, args.output)
	with open(args.output, 'ab') as f:
	f.write(bytes)