Skip to content

Instantly share code, notes, and snippets.

@zcutlip
Created February 4, 2020 00:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zcutlip/107cf636983df68282fd1a8a3a85dc0b to your computer and use it in GitHub Desktop.
Save zcutlip/107cf636983df68282fd1a8a3a85dc0b to your computer and use it in GitHub Desktop.
Using py-object-file to Parse a Mach-O

Using py-object-file to Parse a Mach-O

First, instantiate a Mach object, passing it the path to a mach-o binary:

m = Mach("/usr/lib/libobjc.A.dylib")

The Mach class treats all mach-o binaries as if they're fat binaries with at least once slice. So to work with your mach-o you first have to get its slice, even if it's not a fat binary. You can either to this by architecture name or slice index:

arch = m.get_architecture_slice("x86_64")
arch2 = m.get_architecture_slice_at_index(0)
assert arch == arch2

The architecture slice is your actual mach-o object. You can interact with it with programatically. For example, there's a convenience method to look up symbols by name.

symbol = arch.get_symbols_by_name("_objc_msgSend").pop()
print("{:016x}: {}".format(symbol.value, symbol.name))

Here's a complete example:

from pyobjfile.mach_o import Mach


class MachOParseException(Exception):
    def __init__(self, msg, macho_bin_path):
        super().__init__(msg)
        self.macho_bin_path = macho_bin_path


class MachOArchitectureSlice:
    def __init__(self, arch_slice):
        self.arch_slice = arch_slice

    def locate_symbol(self, symbol_name):
        try:
            sym = self.arch_slice.get_symbols_by_name(symbol_name).pop()
        except IndexError:
            raise MachOParseException(
                "Symbol '{}' not found".format(symbol_name), self.macho_bin_path)

        sym_loc = sym.value

        return sym_loc

    def __getattr__(self, attr):
        return getattr(self.arch_slice, attr)


class MachOSymbolParse:
    # TODO: Maybe move this class into py-object-file?

    def __init__(self, macho_bin_path, arch=None, arch_idx=0):
        self.macho_bin_path = macho_bin_path
        m = Mach(macho_bin_path)
        if not m.is_valid():
            raise MachOParseException(
                "Not a valid mach-o binary.", macho_bin_path)
        arch_count = m.get_num_archs()
        if arch_count < 1:
            raise MachOParseException(
                "No architectures found.", macho_bin_path)

        self.arch_slice = self._get_arch_slice(m, arch, arch_idx)

    def locate_symbol(self, symbol_name):
        return self.arch_slice.locate_symbol(symbol_name)

    def _get_arch_slice(self, mach_o_obj, arch, arch_idx):
        arch_slice = None
        if arch:
            arch_slice = mach_o_obj.get_architecture_slice(arch)
            if not arch_slice:
                raise MachOParseException(
                    "No architecture slice found for {}".format(arch))
        else:
            arch_slice = mach_o_obj.get_architecture_slice_at_index(arch_idx)
            if not arch_slice:
                raise MachOParseException(
                    "No architecture slice found at index: {}".format(arch_idx), mach_o_obj.path)

        return MachOArchitectureSlice(arch_slice)


def main(argv):
    macho_bin_path = argv[1]
    symbol_name = argv[2]

    parsed = MachOSymbolParse(macho_bin_path)
    symbol_loc = parsed.locate_symbol(symbol_name)
    print("{}: 0x{:016x}".format(symbol_name, symbol_loc))


if __name__ == "__main__":
    import sys
    main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment