Skip to content

Instantly share code, notes, and snippets.

@YSaxon
Last active May 17, 2024 16:55
Show Gist options
  • Save YSaxon/46fd50e5f09c264ae17f04eafb704376 to your computer and use it in GitHub Desktop.
Save YSaxon/46fd50e5f09c264ae17f04eafb704376 to your computer and use it in GitHub Desktop.

Analyzing Cortex-M Firmware Binary Files with Ghidra

Opening the Firmware Binary

  1. Open the firmware binary in Ghidra

Establishing the Initial Memory Offset

  1. Go to the data view and copy all the string addresses, and paste them into a Jupyter notebook with the code below
  2. Sort and copy all the possible pointers (probably undefined4 type) similarly
  • Use the following Python code to process the data:

    import pyperclip
    import matplotlib.pyplot as plt
    
    def process_list_to_hex(listraw):
        listhex = pyperclip.paste().splitlines()
        for i in listraw:
            i = i.strip()
            i = i.rstrip('h')
            i = int(i, 16)
            listhex.append(i)
        print(listhex[:5])
        return listhex
    
    def find_best_offset(list1, list2):
        set2 = set(list2)
        offset_count = {}
        for addr1 in list1:
            offsets = [addr2 - addr1 for addr2 in set2]
            for offset in offsets:
                offset_count[offset] = offset_count.get(offset, 0) + 1
        sorted_offsets = sorted(offset_count.items(), key=lambda x: x[1], reverse=True)
        return sorted_offsets[:20]
    
    def visualize_offsets(offset_counts):
        offsets = [offset for offset, count in offset_counts]
        counts = [count for offset, count in offset_counts]
        plt.figure(figsize=(12, 6))
        plt.bar(offsets, counts)
        plt.xlabel('Offset')
        plt.ylabel('Number of Matches')
        plt.title('Best Offsets by Number of Matches')
        plt.xticks(rotation=45)
        plt.grid(True)
        plt.show()
  1. Identify the offset that has significantly more matches than the others. Go to Memory Map and move the starting address to that offset. Now you should see many strings whose references align.

Automating the entire step

from ghidra.program.model.listing import CodeUnit
from ghidra.program.model.data import DataType
from ghidra.program.model.mem import MemoryBlock
from ghidra.program.model.symbol import SymbolType
from ghidra.util.task import TaskMonitor

program = getCurrentProgram()
listing = program.getListing()

# Function to get all undefined4 and string addresses
def get_all_undefined4_and_strings():
    undefined4_addresses = []
    string_addresses = []
    dataIterator = listing.getDefinedData(True)
    
    while dataIterator.hasNext():
        data = dataIterator.next()
        dataType = data.getDataType()
        if dataType.getDisplayName() == "undefined4" or "pointer" in dataType.getDisplayName():
            undefined4_addresses.append(data.getValue())
        elif "string" in dataType.getDisplayName().lower():
            string_addresses.append(data.getMinAddress())
    
    return undefined4_addresses, string_addresses

# Function to process list to hex
def process_list_to_hex(listraw):
    listhex = []
    for i in listraw:
        i = str(i).rstrip('L').lstrip('0x') or '0'
        listhex.append(int(i, 16))
    return listhex

# Function to find best offset
def find_best_offset(list1, list2):
    set2 = set(list2)
    offset_count = {}
    for addr1 in list1:
        offsets = [addr2 - addr1 for addr2 in set2]
        for offset in offsets:
            offset_count[offset] = offset_count.get(offset, 0) + 1
    sorted_offsets = sorted(offset_count.items(), key=lambda x: x[1], reverse=True)
    return sorted_offsets[:20]

# Main script execution
pointer_addresses, string_addresses = get_all_undefined4_and_strings()

hex_string_addresses = process_list_to_hex(string_addresses)
hex_pointer_addresses = process_list_to_hex(pointer_addresses)

best_offsets = find_best_offset(hex_string_addresses, hex_pointer_addresses)
print(best_offsets[:5])

# Print the best offset
if len(best_offsets) > 1 and (best_offsets[0][1] - best_offsets[1][1]) / (best_offsets[1][1] - best_offsets[2][1]) > 4:
    print("Best offset much better than any others:", hex(best_offsets[0][0]))
    memory = currentProgram.getMemory()
    minAddress = memory.getMinAddress()
    print("Assuming you only have one memory block, you'll want to move its starting point by " + hex(best_offsets[0][0]) + " to " + hex(int(str(minAddress), 16)+best_offsets[0][0]))
#    if 	len(memory.getBlocks())==1:
#	memory.moveBlock(memory.getBlocks()[0],memory.getBlocks()[0].getStart().add(best_offsets[0][0]),TaskMonitor.DUMMY)

Identifying Key Functions

  1. Find the top-level function that contains code similar to the following:
puVar4 = pointer_to_20001730;
puVar3 = pointer_to_20000000;
puVar2 = pointer_to_10048a30;
bVar1 = (bool)isCurrentModePrivileged();
if (bVar1) {
    setMainStackPointer(pointer_to_2000fff0);
}
for (iVar6 = 0; puVar5 = pointer_to_2000b358, puVar7 = (undefined4 *)pointer_to_20001730,
     (int)(puVar3 + iVar6) < (int)puVar4; iVar6 = iVar6 + 4) {
    *(undefined4 *)(puVar3 + iVar6) = *(undefined4 *)(puVar2 + iVar6);
}
  1. Rename the pointers based on the addresses they point to (keeping in mind that Ghidra names them by data address, not the pointed-to address). Determine which part of ROM is being copied to RAM (e.g., 0x200...). In the example, 0x1730 bytes are being copied from 0x10048a30 to 0x20000000.

Mapping Memory Regions

  1. Go to the Memory Map again and add a section:
  • Start address: 0x20000000 (substitute with your actual value)
  • Length: 0x1730 (substitute with your actual value)
  • Block Type: Byte Mapped
  • Byte Mapped Start Address: 0x10048a30 (substitute with your actual value)
  1. Add another memory block for the remaining 0x200... RAM segment:
  • Start address: previous block's end address + 1
  • End address: possibly the address under setMainStackPointer (needs further investigation)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment