Skip to content

Instantly share code, notes, and snippets.

@s-ff
Last active March 15, 2024 13:41
Show Gist options
  • Save s-ff/792f31e5f5d92458a60727464b93180e to your computer and use it in GitHub Desktop.
Save s-ff/792f31e5f5d92458a60727464b93180e to your computer and use it in GitHub Desktop.
Just a POC on an idea
#Extract instruction-level number features with P-code
#@category PCode
#@author Soufiane Fariss
#@menupath
#@toolbar
from ghidra.program.model.pcode import HighParam, PcodeOp, PcodeOpAST
from ghidra.program.model.address import AddressSet
from capa.features.extractors.ghidra.insn import *
from capa.features.address import AbsoluteVirtualAddress
def extract_insn_number_features_via_pcode(insn):
"""
Extract number features from an instruction using P-code
"""
pcode = insn.getPcode()
# The code attempts to find contants (conts, value, size) via COPY, INT_ADD. It is still missing PcodeOps
# like LOAD, INT_SUB that could also reference const values
for op in pcode:
if op.getOpcode() == PcodeOp.COPY:
if op.getInput(0).isConstant():
value = op.getInput(0).getOffset()
#if currentProgram().getMemory().contains(toAddr(value)):
# continue
print("absolute(0x{}):\t\t {}\t\t\tin [{}] \t\t @ {}".format(insn.getAddress().toString()[2:], hex(value), op.toString(), insn.getAddress()))
elif op.getOpcode() == PcodeOp.INT_ADD:
if op.getInput(1).isConstant():
value = op.getInput(1).getOffset()
print("absolute(0x{}):\t\t {}\t\t\tin [{}] \t\t @ {}".format(insn.getAddress().toString()[2:], hex(value), op.toString(), insn.getAddress()))
elif op.getInput(0).isConstant():
value = op.getInput(0).getOffset()
print("absolute(0x{}):\t\t {}\t\t\tin [{}] \t\t @ {}".format(insn.getAddress().toString()[2:], hex(value), op.toString(), insn.getAddress()))
# Get all instructions in .text section
start = getMemoryBlock('.text').getStart()
end = getMemoryBlock('.text').getEnd()
range = AddressSet(start, end)
instructions = currentProgram().getListing().getInstructions(range)
# Iterate over each insturction and extract features using the two methods
while instructions.hasNext():
# Method 1: via P-code
insn = instructions.next()
extract_insn_number_features_via_pcode(insn)
# Method 2: via parsing machine code insturctions (from capa)
insn2 = InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn)
features = extract_insn_number_features(0, 0, insn2)
for f in features:
print(insn2.address, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment