Last active
June 30, 2020 07:07
-
-
Save klemens-morgenstern/10ac36d6cb5f90bfb1e2afc6d5df02e6 to your computer and use it in GitHub Desktop.
Generate a location database using pyelftools
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# usage python generate.py <binary> | |
from elftools.elf.elffile import ELFFile | |
from elftools.elf.sections import SymbolTableSection | |
import json | |
import sys | |
import os | |
with open(sys.argv[1], "rb") as binary: | |
elffile = ELFFile(binary) | |
# This is essentially what `nm` does | |
symbols = [] | |
for section in elffile.iter_sections(): | |
if isinstance(section, SymbolTableSection): | |
symbols = [{'name': sym.name, 'address': sym['st_value']} for sym in section.iter_symbols() if len(sym.name) > 0 ] | |
continue | |
# But, we only need three symbols: `main` and the two marks, to we'll filter them: | |
main = next(sym for sym in symbols if sym['name'] == 'main') | |
marks = [sym for sym in symbols if sym['name'].startswith('__metal_serial_')] | |
# The next part is a bit more tricky, since we need to get into debug symbols. But first check we have the debug info | |
if not elffile.has_dwarf_info(): | |
raise Exception("This tool needs debug info.") | |
dbg = elffile.get_dwarf_info() | |
# We get every mark and try to find it's location as file/line | |
for mark in marks: | |
# The debug symbols have a page per compile unit, so we need to find the compile unit with the matching entry | |
# | |
# This is a nested for loop: we iterate over every compile unit and then get the '.debug_line' section for it | |
compile_unit, location = next((cu, entry) for cu in dbg.iter_CUs() for entry in dbg.line_program_for_CU(cu).get_entries() | |
if entry.state is not None and entry.state.address == mark['address']) | |
debug_line = dbg.line_program_for_CU(compile_unit) | |
# We got the compile unit and the entry in the debug symbols from the .debug_line section | |
# There is one complication: the file in the entry is an index referring to the file table, | |
# which in turn refers to the directory table. And they are 1-indexed. So let's add a quick function for that | |
def file_entry_to_abs(file_entry, linep): | |
di = file_entry.dir_index | |
if di > 0: | |
return os.path.join(linep['include_directory'][di-1].decode(), file_entry.name.decode()) | |
else: | |
return os.path.join('.', file_entry.name.decode()) | |
# Now we can add the properties to the mark | |
mark['filename'] = file_entry_to_abs(debug_line['file_entry'][location.state.file - 1], debug_line) | |
mark['line'] = location.state.line | |
# We got the address of main and the relevant code location marks, so let's dump it as a json: | |
print(json.dumps({'main': main, 'marks': marks})) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment