darkarnium/arm-literal-pool-hammer.py

## arm-literal-pool-hammer.py
#
# NOTE: Before running, please ensure to set the minimal string length to 2
#       characters. This can be done by right-clicking any white space in the
#       IDA Strings window, selecting 'Setup', entering '2' into the 'Minimal
#       string length' field, and clicking 'OK'.
#

import time
import idautils

# Define the addresses to 'scan' for literal pools.
rom_scan_start = 0x8000000
rom_scan_end = 0x807FFFF

# Used to track addresses of strings known to IDA (as int).
string_addrs = {}

# Used to track potential literal pool locations.
literal_pools = []


def has_string_entry(addr):
    '''
    Check whether the provided address is being tracked by IDA as a string. In
    order to attempt to speed up subsequent lookups, string addresses will be
    pushed into a list on first use.

    Args:
        addr (int): The address to check.
    Returns:
        The length of the string as known by IDA, or None.
    '''
    if len(string_addrs) == 0:
        for s in idautils.Strings():
            string_addrs[s.ea] = s.length

    try:
        return string_addrs[addr]
    except KeyError:
        return None


def locate_literal_pools(s_addr, e_addr):
    '''
    Attempts to locate all potential literal pools within the provided address
    range by looking for sections which are either UNKNOWN or DATA which have
    cross-refs, but are NOT known to IDA as strings.

    Args:
        s_addr (int): The starting to start scanning at.
        e_addr (int): The address to stop scanning at.

    Returns:
        A list of potential literal pool addresses.
    '''
    pools = []

    c_addr = s_addr
    while c_addr < e_addr:
        flags = GetFlags(c_addr)
        size = get_item_size(c_addr)

        # Skip addresses marked as code.
        if isCode(flags):
            c_addr += size
            continue

        # Check if this address is flagged as being unknown, tail, or data.
        if (isData(flags) or isUnknown(flags) or isTail(flags)):
            # Check if the FF_REF flag is set.
            if flags & 4096 == 4096:
                # Finally, check if this address is known to IDA as a string.
                # If it is not, then it may be a literal pool.
                if not has_string_entry(c_addr):
                    pools.append(c_addr)
                    c_addr += size
                    continue

        c_addr += size

    return pools


def should_mark_as_code(addr):
    '''
    Performs some heuristics on an address to determine whether to mark it as
    code. This is intended for use with ARM binaries which contain literal
    pools, and mileage may vary.

    Args:
        addr (int): The address to analyse
    Results:
        Whether to mark the section as code or not (Boolean).
    '''
    # Skip addresses which are marked as being potential literal pools.
    if addr in literal_pools:
        return False

    # Skip addresses marked as code.
    flags = GetFlags(addr)
    if isCode(flags):
        return False

    # Skip addresses marked as strings.
    if has_string_entry(addr):
        return False

    # Skip NULLs and characters in the ASCII range.
    if int(print_operand(addr, 1), 16) <= 0x7E:
        return False

    return True


loop_count = 1
all_success = set()
all_failure = set()
while True:
    marked_success = []
    marked_failure = []

    # Flatten any cached strings each loop.
    string_addrs = {}

    # Determine potential literal pool locations each loop.
    print('[-] Attempting to locate literal pools')
    literal_pools = locate_literal_pools(rom_scan_start, rom_scan_end)

    print('[-] Attempting to mark sections following literal pools as code')
    for c_addr in literal_pools:
        # We always operate on the NEXT address, as we're trying to inspect the
        # address AFTER the literal pool entry.
        n_addr = c_addr + get_item_size(c_addr)

        # Attempt to mark as code, wait for AA to finish, and check if the
        # address is now marked as code or not.
        if should_mark_as_code(n_addr):
            ida_auto.auto_make_code(n_addr)
            ida_auto.auto_wait()

            # If the address doesn't now have the FF_CODE flag after AA, then
            # it wasn't able to be processed as code.
            if isCode(GetFlags(n_addr)):
                marked_success.append(n_addr)
                all_success.add(n_addr)
            else:
                marked_failure.append(n_addr)
                all_failure.add(n_addr)

    # Print results and loop - if required.
    print(
        '[+] {0} addresses successfully marked as code, {1} failed'.format(
            len(marked_success),
            len(marked_failure)
        )
    )

    # Loop if we successfully marked any new sections as code to ensure that
    # any newly analysed sections are processed.
    loop_count += 1
    if len(marked_success) > 0:
        print('[-] Recursing to process new code sections')
        continue

    # ..aaand we're done.
    break

# Summary and exit.
print(
    '[+] Recursed {0} times with {1} succesfully marked, {2} failed'.format(
        loop_count,
        len(all_success),
        len(all_failure)
    )
)

success_hex = ['0x{0:0x}'.format(addr) for addr in all_success]
failure_hex = ['0x{0:0x}'.format(addr) for addr in all_failure]
print(
    '[!] Failed addresses: {0}'.format(
        ', '.join(failure_hex)
    )
)
print(
    '[+] Success addresses: {0}'.format(
        ', '.join(success_hex)
    )
)
	#
	# NOTE: Before running, please ensure to set the minimal string length to 2
	# characters. This can be done by right-clicking any white space in the
	# IDA Strings window, selecting 'Setup', entering '2' into the 'Minimal
	# string length' field, and clicking 'OK'.
	#

	import time
	import idautils

	# Define the addresses to 'scan' for literal pools.
	rom_scan_start = 0x8000000
	rom_scan_end = 0x807FFFF

	# Used to track addresses of strings known to IDA (as int).
	string_addrs = {}

	# Used to track potential literal pool locations.
	literal_pools = []


	def has_string_entry(addr):
	'''
	Check whether the provided address is being tracked by IDA as a string. In
	order to attempt to speed up subsequent lookups, string addresses will be
	pushed into a list on first use.

	Args:
	addr (int): The address to check.
	Returns:
	The length of the string as known by IDA, or None.
	'''
	if len(string_addrs) == 0:
	for s in idautils.Strings():
	string_addrs[s.ea] = s.length

	try:
	return string_addrs[addr]
	except KeyError:
	return None


	def locate_literal_pools(s_addr, e_addr):
	'''
	Attempts to locate all potential literal pools within the provided address
	range by looking for sections which are either UNKNOWN or DATA which have
	cross-refs, but are NOT known to IDA as strings.

	Args:
	s_addr (int): The starting to start scanning at.
	e_addr (int): The address to stop scanning at.

	Returns:
	A list of potential literal pool addresses.
	'''
	pools = []

	c_addr = s_addr
	while c_addr < e_addr:
	flags = GetFlags(c_addr)
	size = get_item_size(c_addr)

	# Skip addresses marked as code.
	if isCode(flags):
	c_addr += size
	continue

	# Check if this address is flagged as being unknown, tail, or data.
	if (isData(flags) or isUnknown(flags) or isTail(flags)):
	# Check if the FF_REF flag is set.
	if flags & 4096 == 4096:
	# Finally, check if this address is known to IDA as a string.
	# If it is not, then it may be a literal pool.
	if not has_string_entry(c_addr):
	pools.append(c_addr)
	c_addr += size
	continue

	c_addr += size

	return pools


	def should_mark_as_code(addr):
	'''
	Performs some heuristics on an address to determine whether to mark it as
	code. This is intended for use with ARM binaries which contain literal
	pools, and mileage may vary.

	Args:
	addr (int): The address to analyse
	Results:
	Whether to mark the section as code or not (Boolean).
	'''
	# Skip addresses which are marked as being potential literal pools.
	if addr in literal_pools:
	return False

	# Skip addresses marked as code.
	flags = GetFlags(addr)
	if isCode(flags):
	return False

	# Skip addresses marked as strings.
	if has_string_entry(addr):
	return False

	# Skip NULLs and characters in the ASCII range.
	if int(print_operand(addr, 1), 16) <= 0x7E:
	return False

	return True


	loop_count = 1
	all_success = set()
	all_failure = set()
	while True:
	marked_success = []
	marked_failure = []

	# Flatten any cached strings each loop.
	string_addrs = {}

	# Determine potential literal pool locations each loop.
	print('[-] Attempting to locate literal pools')
	literal_pools = locate_literal_pools(rom_scan_start, rom_scan_end)

	print('[-] Attempting to mark sections following literal pools as code')
	for c_addr in literal_pools:
	# We always operate on the NEXT address, as we're trying to inspect the
	# address AFTER the literal pool entry.
	n_addr = c_addr + get_item_size(c_addr)

	# Attempt to mark as code, wait for AA to finish, and check if the
	# address is now marked as code or not.
	if should_mark_as_code(n_addr):
	ida_auto.auto_make_code(n_addr)
	ida_auto.auto_wait()

	# If the address doesn't now have the FF_CODE flag after AA, then
	# it wasn't able to be processed as code.
	if isCode(GetFlags(n_addr)):
	marked_success.append(n_addr)
	all_success.add(n_addr)
	else:
	marked_failure.append(n_addr)
	all_failure.add(n_addr)

	# Print results and loop - if required.
	print(
	'[+] {0} addresses successfully marked as code, {1} failed'.format(
	len(marked_success),
	len(marked_failure)
	)
	)

	# Loop if we successfully marked any new sections as code to ensure that
	# any newly analysed sections are processed.
	loop_count += 1
	if len(marked_success) > 0:
	print('[-] Recursing to process new code sections')
	continue

	# ..aaand we're done.
	break

	# Summary and exit.
	print(
	'[+] Recursed {0} times with {1} succesfully marked, {2} failed'.format(
	loop_count,
	len(all_success),
	len(all_failure)
	)
	)

	success_hex = ['0x{0:0x}'.format(addr) for addr in all_success]
	failure_hex = ['0x{0:0x}'.format(addr) for addr in all_failure]
	print(
	'[!] Failed addresses: {0}'.format(
	', '.join(failure_hex)
	)
	)
	print(
	'[+] Success addresses: {0}'.format(
	', '.join(success_hex)
	)
	)