devanlai/read-static-shell-commands.py

## read-static-shell-commands.py
#!/usr/bin/python
"""
Parse a Zephyr firmware ELF file to extract its statically registered
shell commands using the linker section data structure.

This script requires pyelftools, which can be installed using:

    pip install pyelftools

"""

import argparse
import struct
import sys

import elftools
import elftools.elf.elffile
import elftools.elf.sections

from collections import OrderedDict, namedtuple

def extract_symbol(sym_addr, sym_size, section_data, section_addr):
    """
    Extracts the byte contents of the named symbol object, looking in
    the contents of `section_data`, assuming the data starts at `section_addr`
    """

    offset = sym_addr - section_addr
    if offset < 0 or (offset + sym_size) > len(section_data):
        raise ValueError("Symbol address 0x{:08X} out of bounds [0x{:08X}, 0x{:08X}))".format(
            sym_addr, section_addr, section_addr + len(section_data)))
    return section_data[offset:offset+sym_size]

def read_string(ptr, lookup_func, max_len=1024, encoding="utf-8"):
    count = 0
    string_bytes = bytearray()

    b = lookup_func(ptr, 1)
    ptr += 1

    while b != b'\x00':
        count += 1
        string_bytes.append(b[0])
        b = lookup_func(ptr, 1)
        ptr += 1

        if max_len is not None and count > max_len:
            print(bytes(string_bytes).decode(encoding))
            raise ValueError("String would exceed maximum expected length")
    if encoding is not None:
        return bytes(string_bytes).decode(encoding)
    else:
        return bytes(string_bytes)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Extract statically registered shell command info")
    parser.add_argument("elf_file",
                        type=argparse.FileType("rb"),
                        help="Firmware elf file to read")

    args = parser.parse_args()
    elf = elftools.elf.elffile.ELFFile(args.elf_file)
    # Read the symbol table so we can lookup symbols by name
    symtab_section = elf.get_section_by_name(".symtab")

    def find_symbol(name):
        try:
            [symbol] = symtab_section.get_symbol_by_name(name)
        except (TypeError, ValueError) as e:
            raise ValueError
        return symbol

    # Read the iterable root shell command list section
    root_cmds_section = elf.get_section_by_name("shell_root_cmds_sections")
    root_cmds_data = root_cmds_section.data()
    root_cmds_addr = root_cmds_section["sh_addr"]

    # Read the general rodata section for everything else
    rodata_section = elf.get_section_by_name("rodata")
    rodata_data = rodata_section.data()
    rodata_addr = rodata_section["sh_addr"]

    def read_shell_root_cmd_bytes(ptr, size):
        "Retrieve the `size` bytes stored at `ptr`"
        return extract_symbol(ptr, size, root_cmds_data, root_cmds_addr)

    def read_rodata_bytes(ptr, size):
        "Retrieve the `size` bytes stored at `ptr`"
        return extract_symbol(ptr, size, rodata_data, rodata_addr)

    # Lookup the array of shell root commands
    try:
        shell_root_cmd_array_start_symbol = find_symbol("__shell_root_cmds_start")
        shell_root_cmd_array_end_symbol = find_symbol("__shell_root_cmds_end")
    except ValueError:
        sys.stderr.write('Failed to find shell root cmd array symbols\n')
        sys.exit(1)

    shell_root_cmd_array_start = shell_root_cmd_array_start_symbol["st_value"]
    shell_root_cmd_array_end = shell_root_cmd_array_end_symbol["st_value"]

    # Walk the array of root shell command entries
    ptr = shell_root_cmd_array_start
    static_command_pointers = []
    while ptr < shell_root_cmd_array_end:
        shell_cmd_entry_bytes = read_shell_root_cmd_bytes(ptr, 8)
        is_dynamic, entry_ptr = struct.unpack("<BxxxI", shell_cmd_entry_bytes)
        if not is_dynamic:
            static_command_pointers.append(entry_ptr)
        ptr += 8

    # Recursively decode all commands, starting from the root commands
    # and traversing all reachable static sub commands
    root_commands = []
    command_registry = {}

    def decode_shell_static_entry(ptr):
        if ptr in command_registry:
            return command_registry[ptr]
        try:
            shell_static_entry_bytes = read_rodata_bytes(ptr, 20)
        except:
            return None
        fields = struct.unpack("<IIIIBBxx", shell_static_entry_bytes)
        syntax_ptr, help_ptr, subcmd_ptr, handler_func_ptr, num_req_args, num_opt_args = fields
        if syntax_ptr == 0:
            return None
        syntax_string = read_string(syntax_ptr, read_rodata_bytes) if syntax_ptr != 0 else ""
        help_string = read_string(help_ptr, read_rodata_bytes) if help_ptr != 0 else ""
        sub_commands = []
        command_entry = (syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args)
        command_registry[ptr] = command_entry
        if subcmd_ptr != 0:
            shell_cmd_entry_bytes = read_rodata_bytes(subcmd_ptr, 8)
            is_dynamic, entry_ptr = struct.unpack("<BxxxI", shell_cmd_entry_bytes)
            if not is_dynamic and entry_ptr != 0:
                sub_command = decode_shell_static_entry(entry_ptr)
                while sub_command is not None:
                    sub_commands.append(sub_command)
                    entry_ptr += 20
                    sub_command = decode_shell_static_entry(entry_ptr)
        return command_entry

    for ptr in static_command_pointers:
        root_commands.append(decode_shell_static_entry(ptr))

    root_commands.sort(key = lambda x:x[0])

    # Convert to a flattened command list with nesting information
    command_list = []
    visited = set()
    fringe = [(root_command, 0) for root_command in root_commands]
    while fringe:
        (command, depth) = fringe.pop(0)
        (syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command
        for sub_command in sorted(sub_commands, key=lambda x:x[0]):
            fringe.insert(0, ((sub_command, depth + 1)))

        command_list.append((command, depth))

    # Map each command handler to a symbol name if possible
    handlers_to_lookup = []
    for (command, depth) in command_list:
        (syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command
        if handler_func_ptr != 0:
            handlers_to_lookup.append(handler_func_ptr)

    handler_name_table = {}
    for symbol in symtab_section.iter_symbols():
        if symbol.entry.get("st_value") in handlers_to_lookup:
            handler_name_table[symbol.entry.get("st_value")] = symbol.name

    # Look up the source file and line of each command handler if possible
    handler_source_table = {}
    dwarfinfo = elf.get_dwarf_info()
    if dwarfinfo:
        for CU in dwarfinfo.iter_CUs():
            # First, look at line programs to find the file/line for the address
            lineprog = dwarfinfo.line_program_for_CU(CU)
            prevstate = None
            for entry in lineprog.get_entries():
                # We're interested in those entries where a new state is assigned
                if entry.state is None:
                    continue
                # Looking for a range of addresses in two consecutive states that
                # contain the required address.
                for handler_func_ptr in handlers_to_lookup:
                    if prevstate and prevstate.address <= handler_func_ptr < entry.state.address:
                        filename = lineprog['file_entry'][prevstate.file - 1].name.decode("utf-8")
                        line = prevstate.line
                        handler_source_table[handler_func_ptr] = (filename, line)
                if entry.state.end_sequence:
                    # For the state with `end_sequence`, `address` means the address
                    # of the first byte after the target machine instruction
                    # sequence and other information is meaningless. We clear
                    # prevstate so that it's not used in the next iteration. Address
                    # info is used in the above comparison to see if we need to use
                    # the line information for the prevstate.
                    prevstate = None
                else:
                    prevstate = entry.state

    # Display command tree
    for (command, depth) in command_list:
        (syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command
        if handler_func_ptr != 0:
            handler_name = handler_name_table.get(handler_func_ptr)
            handler_source = handler_source_table.get(handler_func_ptr)
            if handler_name is not None and handler_source is not None:
                print("{:s}{:s} - {:s} [{} from {}:{}]".format("  "*depth, syntax_string, help_string, handler_name, handler_source[0], handler_source[1]))
            elif handler_name is not None:
                print("{:s}{:s} - {:s} [{}]".format("  "*depth, syntax_string, help_string, handler_name))
            else:
                print("{:s}{:s} - {:s}".format("  "*depth, syntax_string, help_string))
        else:
            print("{:s}{:s} - {:s}".format("  "*depth, syntax_string, help_string))

    sys.exit(0)
	#!/usr/bin/python
	"""
	Parse a Zephyr firmware ELF file to extract its statically registered
	shell commands using the linker section data structure.

	This script requires pyelftools, which can be installed using:

	pip install pyelftools

	"""

	import argparse
	import struct
	import sys

	import elftools
	import elftools.elf.elffile
	import elftools.elf.sections

	from collections import OrderedDict, namedtuple

	def extract_symbol(sym_addr, sym_size, section_data, section_addr):
	"""
	Extracts the byte contents of the named symbol object, looking in
	the contents of `section_data`, assuming the data starts at `section_addr`
	"""

	offset = sym_addr - section_addr
	if offset < 0 or (offset + sym_size) > len(section_data):
	raise ValueError("Symbol address 0x{:08X} out of bounds [0x{:08X}, 0x{:08X}))".format(
	sym_addr, section_addr, section_addr + len(section_data)))
	return section_data[offset:offset+sym_size]

	def read_string(ptr, lookup_func, max_len=1024, encoding="utf-8"):
	count = 0
	string_bytes = bytearray()

	b = lookup_func(ptr, 1)
	ptr += 1

	while b != b'\x00':
	count += 1
	string_bytes.append(b[0])
	b = lookup_func(ptr, 1)
	ptr += 1

	if max_len is not None and count > max_len:
	print(bytes(string_bytes).decode(encoding))
	raise ValueError("String would exceed maximum expected length")
	if encoding is not None:
	return bytes(string_bytes).decode(encoding)
	else:
	return bytes(string_bytes)

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Extract statically registered shell command info")
	parser.add_argument("elf_file",
	type=argparse.FileType("rb"),
	help="Firmware elf file to read")

	args = parser.parse_args()
	elf = elftools.elf.elffile.ELFFile(args.elf_file)
	# Read the symbol table so we can lookup symbols by name
	symtab_section = elf.get_section_by_name(".symtab")

	def find_symbol(name):
	try:
	[symbol] = symtab_section.get_symbol_by_name(name)
	except (TypeError, ValueError) as e:
	raise ValueError
	return symbol

	# Read the iterable root shell command list section
	root_cmds_section = elf.get_section_by_name("shell_root_cmds_sections")
	root_cmds_data = root_cmds_section.data()
	root_cmds_addr = root_cmds_section["sh_addr"]

	# Read the general rodata section for everything else
	rodata_section = elf.get_section_by_name("rodata")
	rodata_data = rodata_section.data()
	rodata_addr = rodata_section["sh_addr"]

	def read_shell_root_cmd_bytes(ptr, size):
	"Retrieve the `size` bytes stored at `ptr`"
	return extract_symbol(ptr, size, root_cmds_data, root_cmds_addr)

	def read_rodata_bytes(ptr, size):
	"Retrieve the `size` bytes stored at `ptr`"
	return extract_symbol(ptr, size, rodata_data, rodata_addr)

	# Lookup the array of shell root commands
	try:
	shell_root_cmd_array_start_symbol = find_symbol("__shell_root_cmds_start")
	shell_root_cmd_array_end_symbol = find_symbol("__shell_root_cmds_end")
	except ValueError:
	sys.stderr.write('Failed to find shell root cmd array symbols\n')
	sys.exit(1)

	shell_root_cmd_array_start = shell_root_cmd_array_start_symbol["st_value"]
	shell_root_cmd_array_end = shell_root_cmd_array_end_symbol["st_value"]

	# Walk the array of root shell command entries
	ptr = shell_root_cmd_array_start
	static_command_pointers = []
	while ptr < shell_root_cmd_array_end:
	shell_cmd_entry_bytes = read_shell_root_cmd_bytes(ptr, 8)
	is_dynamic, entry_ptr = struct.unpack("<BxxxI", shell_cmd_entry_bytes)
	if not is_dynamic:
	static_command_pointers.append(entry_ptr)
	ptr += 8

	# Recursively decode all commands, starting from the root commands
	# and traversing all reachable static sub commands
	root_commands = []
	command_registry = {}

	def decode_shell_static_entry(ptr):
	if ptr in command_registry:
	return command_registry[ptr]
	try:
	shell_static_entry_bytes = read_rodata_bytes(ptr, 20)
	except:
	return None
	fields = struct.unpack("<IIIIBBxx", shell_static_entry_bytes)
	syntax_ptr, help_ptr, subcmd_ptr, handler_func_ptr, num_req_args, num_opt_args = fields
	if syntax_ptr == 0:
	return None
	syntax_string = read_string(syntax_ptr, read_rodata_bytes) if syntax_ptr != 0 else ""
	help_string = read_string(help_ptr, read_rodata_bytes) if help_ptr != 0 else ""
	sub_commands = []
	command_entry = (syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args)
	command_registry[ptr] = command_entry
	if subcmd_ptr != 0:
	shell_cmd_entry_bytes = read_rodata_bytes(subcmd_ptr, 8)
	is_dynamic, entry_ptr = struct.unpack("<BxxxI", shell_cmd_entry_bytes)
	if not is_dynamic and entry_ptr != 0:
	sub_command = decode_shell_static_entry(entry_ptr)
	while sub_command is not None:
	sub_commands.append(sub_command)
	entry_ptr += 20
	sub_command = decode_shell_static_entry(entry_ptr)
	return command_entry

	for ptr in static_command_pointers:
	root_commands.append(decode_shell_static_entry(ptr))

	root_commands.sort(key = lambda x:x[0])

	# Convert to a flattened command list with nesting information
	command_list = []
	visited = set()
	fringe = [(root_command, 0) for root_command in root_commands]
	while fringe:
	(command, depth) = fringe.pop(0)
	(syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command
	for sub_command in sorted(sub_commands, key=lambda x:x[0]):
	fringe.insert(0, ((sub_command, depth + 1)))

	command_list.append((command, depth))

	# Map each command handler to a symbol name if possible
	handlers_to_lookup = []
	for (command, depth) in command_list:
	(syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command
	if handler_func_ptr != 0:
	handlers_to_lookup.append(handler_func_ptr)

	handler_name_table = {}
	for symbol in symtab_section.iter_symbols():
	if symbol.entry.get("st_value") in handlers_to_lookup:
	handler_name_table[symbol.entry.get("st_value")] = symbol.name

	# Look up the source file and line of each command handler if possible
	handler_source_table = {}
	dwarfinfo = elf.get_dwarf_info()
	if dwarfinfo:
	for CU in dwarfinfo.iter_CUs():
	# First, look at line programs to find the file/line for the address
	lineprog = dwarfinfo.line_program_for_CU(CU)
	prevstate = None
	for entry in lineprog.get_entries():
	# We're interested in those entries where a new state is assigned
	if entry.state is None:
	continue
	# Looking for a range of addresses in two consecutive states that
	# contain the required address.
	for handler_func_ptr in handlers_to_lookup:
	if prevstate and prevstate.address <= handler_func_ptr < entry.state.address:
	filename = lineprog['file_entry'][prevstate.file - 1].name.decode("utf-8")
	line = prevstate.line
	handler_source_table[handler_func_ptr] = (filename, line)
	if entry.state.end_sequence:
	# For the state with `end_sequence`, `address` means the address
	# of the first byte after the target machine instruction
	# sequence and other information is meaningless. We clear
	# prevstate so that it's not used in the next iteration. Address
	# info is used in the above comparison to see if we need to use
	# the line information for the prevstate.
	prevstate = None
	else:
	prevstate = entry.state

	# Display command tree
	for (command, depth) in command_list:
	(syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command
	if handler_func_ptr != 0:
	handler_name = handler_name_table.get(handler_func_ptr)
	handler_source = handler_source_table.get(handler_func_ptr)
	if handler_name is not None and handler_source is not None:
	print("{:s}{:s} - {:s} [{} from {}:{}]".format(" "*depth, syntax_string, help_string, handler_name, handler_source[0], handler_source[1]))
	elif handler_name is not None:
	print("{:s}{:s} - {:s} [{}]".format(" "*depth, syntax_string, help_string, handler_name))
	else:
	print("{:s}{:s} - {:s}".format(" "*depth, syntax_string, help_string))
	else:
	print("{:s}{:s} - {:s}".format(" "*depth, syntax_string, help_string))

	sys.exit(0)