Skip to content

Instantly share code, notes, and snippets.

@liushuyu
Last active April 19, 2025 08:17
Show Gist options
  • Save liushuyu/cf7688d32b75f897011f6ec7ad644c6f to your computer and use it in GitHub Desktop.
Save liushuyu/cf7688d32b75f897011f6ec7ad644c6f to your computer and use it in GitHub Desktop.
import rzpipe as r2pipe
import json
import logging
import re
import sys
import binascii
import struct
import subprocess
from typing import Tuple
# we need something to narrow down the search scope
SEARCH_STRING = "%s: out of memory to store relocation results for %s"
SEARCH_WINDOW = 32 # Maximum number of instructions to analyze
def match_pattern(r: r2pipe.open_base.OpenBase, end_offset: int) -> Tuple[int, int]:
# reverse search for the pattern
START_PATTERN = r"or\s+byte.+,\s+\d+" # `or byte [???], ?`
CMPZ_PATTERN = r"cmp\s+.+,\s+0" # `cmp ???, 0`
r.cmd(f"s {end_offset}") # seek to the end offset
rev_addr = r.cmd(
f"/o {SEARCH_WINDOW}"
) # search for the pattern in the last SEARCH_WINDOW instructions
if not rev_addr:
raise Exception("can not seek backwards by %d instructions", SEARCH_WINDOW)
r.cmd(f"s {rev_addr}") # seek to the start of the search window
instructions = json.loads(
r.cmd(f"pdj {SEARCH_WINDOW}")
) # disassemble the instructions
start_addr = -1
has_cmpz = False
for i in instructions:
if re.match(START_PATTERN, i["disasm"]):
start_addr = i["offset"]
continue
if re.match(CMPZ_PATTERN, i["disasm"]):
has_cmpz = True
continue
return start_addr if has_cmpz else -1, end_offset
def generate_latx_c_definition(
r: r2pipe.open_base.OpenBase, start_offset: int, end_offset: int
) -> str:
BUILDING_BLOCKS = {
0x4180: "K_OR",
0x4883: "K_CMP_48",
0x4983: "K_CMP_49",
0x0F85: "K_JNE",
0x488D: "K_LEA",
0x498B: "K_MOV",
0x4885: "K_TEST",
}
block = json.loads(r.cmd(f"s {start_offset}; pDj {end_offset - start_offset}"))
last_size = 0
offset = 0
buffer = []
for i in block:
instr = binascii.unhexlify(i["bytes"])
opcode = struct.unpack(">H", instr[:2])[0]
piece = BUILDING_BLOCKS.get(opcode, "K_SKIP")
buffer.append(f"{piece}({last_size})")
last_size = i["size"]
offset += last_size
part_offset = offset + 11
generated_code = ("struct ld_part_s all_part[]={.ins={%s},.part_offset=%s};" % (','.join(buffer), part_offset))
try:
# format using clang-format if possible
output = subprocess.check_output(["clang-format", "-"], input=generated_code.encode()).decode()
generated_code = output
except subprocess.CalledProcessError:
# If clang-format is not available, just use the generated code
pass
return generated_code
def load_binary(binary_path: str):
logging.basicConfig(level=logging.INFO)
logging.info(f"Loading binary into R2/Rizin: {binary_path}")
# Load binary into memory
r = r2pipe.open(binary_path)
logging.info("Waiting for R2/Rizin to return the analysis results ...")
r.cmd("aaa")
string_loc = json.loads(r.cmd(f"/j {SEARCH_STRING}"))
assert isinstance(string_loc, list) # Ensure the search result is a list
if not string_loc:
raise Exception("Can not find the specified string in the binary")
string_offset = string_loc[0]["offset"]
r.cmd(f"s {string_offset}")
ref_loc = json.loads(
r.cmd("axtj")
) # Check the function containing the search string
assert isinstance(ref_loc, list) # Ensure the function location is a list
if not ref_loc:
raise Exception("Can not find the referenced function")
ref_loc = ref_loc[0]["from"]
logging.info(f"Found referenced instruction at offset: 0x{ref_loc:x}")
r.cmd(f"s {ref_loc}") # seek to the referenced instruction
r.cmd("sf.") # seek to the function start
func_start: str = r.cmd("s").strip()
func_data = json.loads(r.cmd("afij"))
assert isinstance(func_data, list) # Ensure the function location is a list
if len(func_data) != 1:
raise Exception("Function data should contain exactly one entry")
func_size: int = func_data[0]["size"]
logging.info(f"Found function start at {func_start}, size: {func_size}")
logging.info("Searching for function end blocks ...")
func_end = int(func_start, 16) + func_size
epilogue_results = json.loads(
r.cmd(
f"e search.from={func_start}; e search.to=0x{func_end:x};/xj 5b415c415d415e415f5dc3"
)
)
assert isinstance(epilogue_results, list) # Ensure the search result is a list
if not epilogue_results:
raise Exception("Can not find the epilogue for the function")
logging.info(f"Found {len(epilogue_results)} candidate(s) for the epilogue")
candidates = []
for ep in epilogue_results:
result = match_pattern(r, ep["offset"])
if result[0] == -1:
continue
candidates.append(result)
if not candidates:
raise Exception("No matching epilogue candidates found")
elif len(candidates) > 1:
raise Exception(
"Multiple matching epilogue candidates found, this script can not handle this situation!"
)
logging.info(f"Found matching epilogue at offset: 0x{candidates[0][1]:x}")
c_def = generate_latx_c_definition(
r, candidates[0][0], candidates[0][1]
) # start_offset, end_offset
logging.info("Success! Generated C code:")
print(c_def)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python find_epilogue.py <<path/to/ld.so>>")
sys.exit(1)
load_binary(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment