Skip to content

Instantly share code, notes, and snippets.

@rpw
Created October 22, 2020 14:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save rpw/2c4064712638bce602755a938991e5e9 to your computer and use it in GitHub Desktop.
Save rpw/2c4064712638bce602755a938991e5e9 to your computer and use it in GitHub Desktop.
Find base addresses of flat firmware binaries using differences between string references
# Reverse engineering tricks:
# Determine load addresses using differences between string references
# (c) 2017 Comsecuris UG
from idc import *
from idautils import *
from sets import Set
ATTEMPTS = 10
#############################################################################
def memory_dwords_as_sorted_list(start, end):
print("Gathering dwords in memory. This may take a while...")
values = []
for a in range(start, end, 4):
values += [ Dword(a) ]
values = list(Set(values))
values.sort()
return values
def compute_differences(values):
differences = [0] * (len(values)-1)
for i in range(len(values)-1):
differences[i] = values[i+1] - values[i]
return differences
# Knuth Morris Pratt algorithm to find all subsequences
# https://www.safaribooksonline.com/library/view/python-cookbook-2nd/0596007973/ch05s14.html
def KMP(text, pattern):
# ensure we can index into pattern, and also make a copy to protect
# against changes to 'pattern' while we're suspended by `yield'
pattern = list(pattern)
length = len(pattern)
# build the KMP "table of shift amounts" and name it 'shifts'
shifts = [1] * (length + 1)
shift = 1
for pos, pat in enumerate(pattern):
while shift <= pos and pat != pattern[pos-shift]:
shift += shifts[pos-shift]
shifts[pos+1] = shift
# perform the actual search
startPos = 0
matchLen = 0
for c in text:
while matchLen == length or matchLen >= 0 and pattern[matchLen] != c:
startPos += shifts[matchLen]
matchLen -= shifts[matchLen]
matchLen += 1
if matchLen == length: yield startPos
def identify_base_addresses(values, strs):
string_differences = compute_differences(strs)
differences = compute_differences(values)
for v in string_differences:
if v <= 0:
print("Invalid sequence of strings. Memory addresses have to be in strictly increasing order.")
return []
matches = KMP(differences, string_differences)
baseaddrs = []
for pos in matches:
baseaddrs += [(values[pos]-strs[0])]
return baseaddrs
def find_adjacent_strings(strs):
from random import randint
sample_nr = 6
for attempt in xrange(0, ATTEMPTS):
start_off = randint(0, len(strs) - sample_nr)
matches = []
print("Looking for adjacent strings (attempt %d at offset %d)" %(attempt + 1, start_off))
for i in xrange(start_off, len(strs)-1):
if len(matches) == sample_nr:
return matches
if strs[i][0] + strs[i][1] == strs[i+1][0]:
matches.append(strs[i][0])
else:
del matches[:]
print("No adjacent strings found...")
return []
#############################################################################
strings = []
found = False
ea = ScreenEA()
values = memory_dwords_as_sorted_list(SegStart(ea), SegEnd(ea))
for s in Strings():
strings.append((s.ea, s.length))
baseaddrs = []
for attempt in xrange(0, ATTEMPTS):
# lists of start addresses of strings that are consecutively placed in memory
stringaddrs = find_adjacent_strings(strings)
if stringaddrs == []: continue
print("using adjacent strings %s" %(["%x" %(s) for s in stringaddrs]))
baseaddrs = identify_base_addresses(values, stringaddrs)
for baseaddr in baseaddrs:
print "Identified potential base address for image: %08x" % baseaddr
found = True
if found: break
if len(baseaddrs) == 0:
print("NO potential base address found, you may try again")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment