Skip to content

Instantly share code, notes, and snippets.

@darkarnium
Created December 15, 2019 14:33
Show Gist options
  • Save darkarnium/4aa515d1a55bc7aae1d5d93aafcbfa7f to your computer and use it in GitHub Desktop.
Save darkarnium/4aa515d1a55bc7aae1d5d93aafcbfa7f to your computer and use it in GitHub Desktop.
IDA - Attempt to locate literal pools, and mark subsequent sections as code
#
# NOTE: Before running, please ensure to set the minimal string length to 2
# characters. This can be done by right-clicking any white space in the
# IDA Strings window, selecting 'Setup', entering '2' into the 'Minimal
# string length' field, and clicking 'OK'.
#
import time
import idautils
# Define the addresses to 'scan' for literal pools.
rom_scan_start = 0x8000000
rom_scan_end = 0x807FFFF
# Used to track addresses of strings known to IDA (as int).
string_addrs = {}
# Used to track potential literal pool locations.
literal_pools = []
def has_string_entry(addr):
'''
Check whether the provided address is being tracked by IDA as a string. In
order to attempt to speed up subsequent lookups, string addresses will be
pushed into a list on first use.
Args:
addr (int): The address to check.
Returns:
The length of the string as known by IDA, or None.
'''
if len(string_addrs) == 0:
for s in idautils.Strings():
string_addrs[s.ea] = s.length
try:
return string_addrs[addr]
except KeyError:
return None
def locate_literal_pools(s_addr, e_addr):
'''
Attempts to locate all potential literal pools within the provided address
range by looking for sections which are either UNKNOWN or DATA which have
cross-refs, but are NOT known to IDA as strings.
Args:
s_addr (int): The starting to start scanning at.
e_addr (int): The address to stop scanning at.
Returns:
A list of potential literal pool addresses.
'''
pools = []
c_addr = s_addr
while c_addr < e_addr:
flags = GetFlags(c_addr)
size = get_item_size(c_addr)
# Skip addresses marked as code.
if isCode(flags):
c_addr += size
continue
# Check if this address is flagged as being unknown, tail, or data.
if (isData(flags) or isUnknown(flags) or isTail(flags)):
# Check if the FF_REF flag is set.
if flags & 4096 == 4096:
# Finally, check if this address is known to IDA as a string.
# If it is not, then it may be a literal pool.
if not has_string_entry(c_addr):
pools.append(c_addr)
c_addr += size
continue
c_addr += size
return pools
def should_mark_as_code(addr):
'''
Performs some heuristics on an address to determine whether to mark it as
code. This is intended for use with ARM binaries which contain literal
pools, and mileage may vary.
Args:
addr (int): The address to analyse
Results:
Whether to mark the section as code or not (Boolean).
'''
# Skip addresses which are marked as being potential literal pools.
if addr in literal_pools:
return False
# Skip addresses marked as code.
flags = GetFlags(addr)
if isCode(flags):
return False
# Skip addresses marked as strings.
if has_string_entry(addr):
return False
# Skip NULLs and characters in the ASCII range.
if int(print_operand(addr, 1), 16) <= 0x7E:
return False
return True
loop_count = 1
all_success = set()
all_failure = set()
while True:
marked_success = []
marked_failure = []
# Flatten any cached strings each loop.
string_addrs = {}
# Determine potential literal pool locations each loop.
print('[-] Attempting to locate literal pools')
literal_pools = locate_literal_pools(rom_scan_start, rom_scan_end)
print('[-] Attempting to mark sections following literal pools as code')
for c_addr in literal_pools:
# We always operate on the NEXT address, as we're trying to inspect the
# address AFTER the literal pool entry.
n_addr = c_addr + get_item_size(c_addr)
# Attempt to mark as code, wait for AA to finish, and check if the
# address is now marked as code or not.
if should_mark_as_code(n_addr):
ida_auto.auto_make_code(n_addr)
ida_auto.auto_wait()
# If the address doesn't now have the FF_CODE flag after AA, then
# it wasn't able to be processed as code.
if isCode(GetFlags(n_addr)):
marked_success.append(n_addr)
all_success.add(n_addr)
else:
marked_failure.append(n_addr)
all_failure.add(n_addr)
# Print results and loop - if required.
print(
'[+] {0} addresses successfully marked as code, {1} failed'.format(
len(marked_success),
len(marked_failure)
)
)
# Loop if we successfully marked any new sections as code to ensure that
# any newly analysed sections are processed.
loop_count += 1
if len(marked_success) > 0:
print('[-] Recursing to process new code sections')
continue
# ..aaand we're done.
break
# Summary and exit.
print(
'[+] Recursed {0} times with {1} succesfully marked, {2} failed'.format(
loop_count,
len(all_success),
len(all_failure)
)
)
success_hex = ['0x{0:0x}'.format(addr) for addr in all_success]
failure_hex = ['0x{0:0x}'.format(addr) for addr in all_failure]
print(
'[!] Failed addresses: {0}'.format(
', '.join(failure_hex)
)
)
print(
'[+] Success addresses: {0}'.format(
', '.join(success_hex)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment