Skip to content

Instantly share code, notes, and snippets.

@8051Enthusiast
Last active April 26, 2023 23:20
Show Gist options
  • Save 8051Enthusiast/e384672cb0b4a779f668956282a06835 to your computer and use it in GitHub Desktop.
Save 8051Enthusiast/e384672cb0b4a779f668956282a06835 to your computer and use it in GitHub Desktop.
a script that tries to find a base address candidate for firmware by looking at string addresses and pointer target addresses
#!/usr/bin/env python3
import sys
import re
import argparse
import math
from typing import Literal
import numpy as np
def get_string_addresses(b: bytes, n: int) -> set[int]:
"""
Finds all utf8 strings of length at least n that are NUL-terminated and returns
the set of offsets
"""
# this regex also matches overlong encodings and surrogates
# this is fine because this is a heuristic anyway
string_regex = (rb'(?:[\n\r\t\f\x20-\x7E\n]'
rb'|[\xC2-\xDF][\x80-\xBF]'
rb'|[\xE0-\xEF][\x80-\xBF]{2}'
rb'|[\xF0-\xF4][\x80-\xBF]{3})'
+ f'{{{n},}}'.encode()
+ rb'\x00')
return {m.start() for m in re.finditer(string_regex, b)}
def get_pointed_addresses(b: bytes, endian: Literal['little', 'big'], length: int, align: int) -> set[int]:
"""
Interprets each offset in b as a pointer to an address in memory
and returns the set of all addresses that are pointed to.
"""
pointed_addresses = set()
for i in range(0, len(b) - length + 1, align):
offset_bytes = b[i:i+length]
offset = int.from_bytes(offset_bytes, byteorder=endian)
pointed_addresses.add(offset)
return pointed_addresses
def find_coprime_numbers(n: int, m: int) -> list[int]:
"""
Returns a list of integers greater than n, whose product is greater than m and
that are pairwise coprime.
"""
# only use odd numbers because with unaligned pointers there
# will be a lot of pointers that differ from each other by a
# multiple of 256, which interacts badly with the cross-correlation
i = n + 1 + (n % 2)
coprimes = [i]
product = i
i += 2
while product <= m:
if math.gcd(i, product) == 1:
coprimes.append(i)
product *= i
i += 2
return coprimes
def find_max_overlap(A: set[int], B: set[int], modulos: list[int]) -> int:
"""
Finds the probable offset between two sets of integers A and B, where the offset
is calculated by finding out the overlap modulo each modulus in the list.
The final offset is calculated using the Chinese remainder theorem.
"""
offsets = []
print(f'0/{len(modulos)}', file=sys.stderr, end='', flush=True)
for p in modulos:
a = np.zeros(p, dtype=np.float64)
b = np.zeros(p, dtype=np.float64)
for x in A:
a[x % p] += 1
for x in B:
# reverse B to turn cross-correlation into convolution
b[(p - x % p) % p] += 1
# circular convolution of size p (per convolution theorem)
corr = np.fft.ifft(np.fft.fft(a) * np.fft.fft(b)).real
offset = int(np.argmax(corr))
offsets.append(offset)
print(f'\r{len(offsets)}/{len(modulos)}', file=sys.stderr, end='', flush=True)
print(file=sys.stderr)
# chinese remainder theorem
M = math.prod(modulos)
k = sum(offset * (M // p) * pow(M // p, -1, p)
for p, offset in zip(modulos, offsets)) % M
return int(k)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("file", help="the file to analyze")
parser.add_argument("-n", help="the minimum length of strings to look for", type=int, default=5)
parser.add_argument("-l", help="the length of pointers to look for", type=int, default=8)
parser.add_argument("-e", help="the endianness of the pointers", choices=["little", "big"], default="little")
parser.add_argument("-a", help="the alignment of the pointers", type=int)
parser.add_argument("-f", help="slack factor (higher = slower but more accurate)", type=float)
args = parser.parse_args()
if args.a is None:
args.a = args.l
if args.f is None:
# make sure the ratio (number of pointers pointers/modulo) is at most 1/16 per default
# to make the noise floor reasonably low
args.f = min(1.0, 16.0 / args.a)
with open(args.file, "rb") as f:
b = f.read()
strings = get_string_addresses(b, args.n)
print(f'Found {len(strings)} strings', file=sys.stderr)
pointers = get_pointed_addresses(b, args.e, args.l, args.a)
modulos = find_coprime_numbers(int(len(b) * args.f), 2**(args.l*8) + len(b))
offset = find_max_overlap(pointers, strings, modulos)
negative_offset = math.prod(modulos) - offset
if negative_offset < len(b):
print(f"Offset: -{negative_offset:#x}")
elif offset < 2**(args.l*8):
print(f"Offset: {offset:#x}")
else:
print("Offset: not found")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment