Last active
April 19, 2025 08:17
-
-
Save liushuyu/cf7688d32b75f897011f6ec7ad644c6f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import rzpipe as r2pipe | |
import json | |
import logging | |
import re | |
import sys | |
import binascii | |
import struct | |
import subprocess | |
from typing import Tuple | |
# we need something to narrow down the search scope | |
SEARCH_STRING = "%s: out of memory to store relocation results for %s" | |
SEARCH_WINDOW = 32 # Maximum number of instructions to analyze | |
def match_pattern(r: r2pipe.open_base.OpenBase, end_offset: int) -> Tuple[int, int]: | |
# reverse search for the pattern | |
START_PATTERN = r"or\s+byte.+,\s+\d+" # `or byte [???], ?` | |
CMPZ_PATTERN = r"cmp\s+.+,\s+0" # `cmp ???, 0` | |
r.cmd(f"s {end_offset}") # seek to the end offset | |
rev_addr = r.cmd( | |
f"/o {SEARCH_WINDOW}" | |
) # search for the pattern in the last SEARCH_WINDOW instructions | |
if not rev_addr: | |
raise Exception("can not seek backwards by %d instructions", SEARCH_WINDOW) | |
r.cmd(f"s {rev_addr}") # seek to the start of the search window | |
instructions = json.loads( | |
r.cmd(f"pdj {SEARCH_WINDOW}") | |
) # disassemble the instructions | |
start_addr = -1 | |
has_cmpz = False | |
for i in instructions: | |
if re.match(START_PATTERN, i["disasm"]): | |
start_addr = i["offset"] | |
continue | |
if re.match(CMPZ_PATTERN, i["disasm"]): | |
has_cmpz = True | |
continue | |
return start_addr if has_cmpz else -1, end_offset | |
def generate_latx_c_definition( | |
r: r2pipe.open_base.OpenBase, start_offset: int, end_offset: int | |
) -> str: | |
BUILDING_BLOCKS = { | |
0x4180: "K_OR", | |
0x4883: "K_CMP_48", | |
0x4983: "K_CMP_49", | |
0x0F85: "K_JNE", | |
0x488D: "K_LEA", | |
0x498B: "K_MOV", | |
0x4885: "K_TEST", | |
} | |
block = json.loads(r.cmd(f"s {start_offset}; pDj {end_offset - start_offset}")) | |
last_size = 0 | |
offset = 0 | |
buffer = [] | |
for i in block: | |
instr = binascii.unhexlify(i["bytes"]) | |
opcode = struct.unpack(">H", instr[:2])[0] | |
piece = BUILDING_BLOCKS.get(opcode, "K_SKIP") | |
buffer.append(f"{piece}({last_size})") | |
last_size = i["size"] | |
offset += last_size | |
part_offset = offset + 11 | |
generated_code = ("struct ld_part_s all_part[]={.ins={%s},.part_offset=%s};" % (','.join(buffer), part_offset)) | |
try: | |
# format using clang-format if possible | |
output = subprocess.check_output(["clang-format", "-"], input=generated_code.encode()).decode() | |
generated_code = output | |
except subprocess.CalledProcessError: | |
# If clang-format is not available, just use the generated code | |
pass | |
return generated_code | |
def load_binary(binary_path: str): | |
logging.basicConfig(level=logging.INFO) | |
logging.info(f"Loading binary into R2/Rizin: {binary_path}") | |
# Load binary into memory | |
r = r2pipe.open(binary_path) | |
logging.info("Waiting for R2/Rizin to return the analysis results ...") | |
r.cmd("aaa") | |
string_loc = json.loads(r.cmd(f"/j {SEARCH_STRING}")) | |
assert isinstance(string_loc, list) # Ensure the search result is a list | |
if not string_loc: | |
raise Exception("Can not find the specified string in the binary") | |
string_offset = string_loc[0]["offset"] | |
r.cmd(f"s {string_offset}") | |
ref_loc = json.loads( | |
r.cmd("axtj") | |
) # Check the function containing the search string | |
assert isinstance(ref_loc, list) # Ensure the function location is a list | |
if not ref_loc: | |
raise Exception("Can not find the referenced function") | |
ref_loc = ref_loc[0]["from"] | |
logging.info(f"Found referenced instruction at offset: 0x{ref_loc:x}") | |
r.cmd(f"s {ref_loc}") # seek to the referenced instruction | |
r.cmd("sf.") # seek to the function start | |
func_start: str = r.cmd("s").strip() | |
func_data = json.loads(r.cmd("afij")) | |
assert isinstance(func_data, list) # Ensure the function location is a list | |
if len(func_data) != 1: | |
raise Exception("Function data should contain exactly one entry") | |
func_size: int = func_data[0]["size"] | |
logging.info(f"Found function start at {func_start}, size: {func_size}") | |
logging.info("Searching for function end blocks ...") | |
func_end = int(func_start, 16) + func_size | |
epilogue_results = json.loads( | |
r.cmd( | |
f"e search.from={func_start}; e search.to=0x{func_end:x};/xj 5b415c415d415e415f5dc3" | |
) | |
) | |
assert isinstance(epilogue_results, list) # Ensure the search result is a list | |
if not epilogue_results: | |
raise Exception("Can not find the epilogue for the function") | |
logging.info(f"Found {len(epilogue_results)} candidate(s) for the epilogue") | |
candidates = [] | |
for ep in epilogue_results: | |
result = match_pattern(r, ep["offset"]) | |
if result[0] == -1: | |
continue | |
candidates.append(result) | |
if not candidates: | |
raise Exception("No matching epilogue candidates found") | |
elif len(candidates) > 1: | |
raise Exception( | |
"Multiple matching epilogue candidates found, this script can not handle this situation!" | |
) | |
logging.info(f"Found matching epilogue at offset: 0x{candidates[0][1]:x}") | |
c_def = generate_latx_c_definition( | |
r, candidates[0][0], candidates[0][1] | |
) # start_offset, end_offset | |
logging.info("Success! Generated C code:") | |
print(c_def) | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print("Usage: python find_epilogue.py <<path/to/ld.so>>") | |
sys.exit(1) | |
load_binary(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment