Created
March 3, 2024 12:41
-
-
Save schwartz1375/ab819e8325c57ead85c0a9fd29ff76b8 to your computer and use it in GitHub Desktop.
peComplexityAnalyzer.py is a Python script designed to estimate the cyclomatic complexity of PE (Portable Executable) files, commonly used in Windows environments.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'Matthew Schwartz' | |
''' | |
This script utilizes pefile for parsing PE file structures and capstone for disassembling the binary code. The primary focus is on accurately extracting and analyzing the assembly code from executable sections of the PE file to calculate cyclomatic complexity, a metric that provides insight into the code's complexity and potential maintainability issues. | |
''' | |
import sys | |
import pefile | |
import capstone | |
def disassemble_pe(input_file): | |
try: | |
pe = pefile.PE(input_file) | |
if pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE['IMAGE_FILE_MACHINE_I386']: | |
md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32) | |
elif pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE['IMAGE_FILE_MACHINE_AMD64']: | |
md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) | |
else: | |
raise Exception("Unsupported architecture") | |
instructions = [] | |
for section in pe.sections: | |
if section.IMAGE_SCN_MEM_EXECUTE: | |
base_addr = section.VirtualAddress | |
code = section.get_data() | |
disassembly = md.disasm(bytes(code), base_addr) | |
for insn in disassembly: | |
instructions.append(f"{insn.mnemonic} {insn.op_str}") | |
return instructions | |
except Exception as e: | |
print(f"Error during disassembly: {e}") | |
return [] | |
def estimate_cyclomatic_complexity(assembly_instructions): | |
conditional_jumps = [ | |
'ja', 'jae', 'jb', 'jbe', 'jc', 'jcxz', 'jecxz', | |
'jrcxz', 'je', 'jg', 'jge', 'jl', 'jle', 'jna', | |
'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge', | |
'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', | |
'jp', 'jpe', 'jpo', 'js', 'jz' | |
] | |
count = sum(1 for instruction in assembly_instructions if instruction.split()[0] in conditional_jumps) | |
return count + 1 # Adding 1 to account for the initial path | |
def main(): | |
if len(sys.argv) != 2: | |
print("Usage: python script.py <binary_file>") | |
sys.exit(1) | |
binary_file = sys.argv[1] | |
assembly_instructions = disassemble_pe(binary_file) | |
if assembly_instructions: | |
complexity = estimate_cyclomatic_complexity(assembly_instructions) | |
print("Cyclomatic Complexity:", complexity) | |
else: | |
print("No executable sections found or unable to disassemble.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment