Skip to content

Instantly share code, notes, and snippets.

@rpw
Created March 21, 2023 11:50
Show Gist options
  • Save rpw/b88fb40578781507d9edd2db1b22e925 to your computer and use it in GitHub Desktop.
Save rpw/b88fb40578781507d9edd2db1b22e925 to your computer and use it in GitHub Desktop.
# reverse engineering tricks: Determine load addresses using string reference differentials
# (c) 2017-2023 Comsecuris GmbH
from idc import *
from idautils import *
from sets import Set
ATTEMPTS = 1
#############################################################################
def memory_dwords_as_sorted_list(start, end):
print("Gathering dwords in memory. This may take a while...")
values = []
for a in range(start, end, 4):
values += [ Dword(a) ]
values = list(Set(values))
values.sort()
return values
def compute_differences(values):
differences = [0] * (len(values)-1)
for i in range(len(values)-1):
differences[i] = values[i+1] - values[i]
return differences
# Knuth Morris Pratt algorithm to find all subsequences
# https://www.safaribooksonline.com/library/view/python-cookbook-2nd/0596007973/ch05s14.html
def KMP(text, pattern):
# ensure we can index into pattern, and also make a copy to protect
# against changes to 'pattern' while we're suspended by `yield'
pattern = list(pattern)
length = len(pattern)
# build the KMP "table of shift amounts" and name it 'shifts'
shifts = [1] * (length + 1)
shift = 1
for pos, pat in enumerate(pattern):
while shift <= pos and pat != pattern[pos-shift]:
shift += shifts[pos-shift]
shifts[pos+1] = shift
# perform the actual search
startPos = 0
matchLen = 0
for c in text:
while matchLen == length or matchLen >= 0 and pattern[matchLen] != c:
startPos += shifts[matchLen]
matchLen -= shifts[matchLen]
matchLen += 1
if matchLen == length: yield startPos
def identify_base_addresses(values, strs):
string_differences = compute_differences(strs)
differences = compute_differences(values)
for v in string_differences:
if v <= 0:
print("Invalid sequence of strings. Memory addresses have to be in strictly increasing order.")
return []
matches = KMP(differences, string_differences)
baseaddrs = []
for pos in matches:
baseaddrs += [(values[pos]-strs[0])]
return baseaddrs
def find_adjacent_strings(strs):
from random import randint
sample_nr = 6
for attempt in xrange(0, ATTEMPTS):
start_off = randint(0, len(strs) - sample_nr)
matches = []
print("Looking for adjacent strings (attempt %d at offset %d)" %(attempt + 1, start_off))
for i in xrange(start_off, len(strs)-1):
if len(matches) == sample_nr:
return matches
if strs[i][0] + strs[i][1] == strs[i+1][0]:
matches.append(strs[i][0])
else:
del matches[:]
print("No adjacent strings found...")
return []
#############################################################################
strings = []
found = False
ea = ScreenEA()
values = memory_dwords_as_sorted_list(SegStart(ea), SegEnd(ea))
for s in Strings():
strings.append((s.ea, s.length))
## lists of start addresses of strings that are consecutively placed in memory
stringaddrs = [0x1b594, 0x1b5b4, 0x1b5d4, 0x1b5f0, 0x1b60c, 0x1b628]
baseaddrs = []
for attempt in xrange(0, ATTEMPTS):
stringaddrs = find_adjacent_strings(strings)
if stringaddrs == []: continue
print("using adjacent strings %s" %(["%x" %(s) for s in stringaddrs]))
baseaddrs = identify_base_addresses(values, stringaddrs)
for baseaddr in baseaddrs:
print "Identified potential base address for image: %08x" % baseaddr
found = True
if found: break
if len(baseaddrs) == 0:
print("NO potential base address found, you may try again")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment