ida processor module script for chip-8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# ---------------------------------------------------------------------- | |
# Processor module template script | |
# (c) Hex-Rays | |
import sys | |
from ida_bytes import * | |
from ida_ua import * | |
from ida_idp import * | |
from ida_auto import * | |
from ida_nalt import * | |
import ida_frame | |
from ida_funcs import * | |
from ida_lines import * | |
from ida_problems import * | |
import ida_offset | |
from ida_segment import * | |
from ida_name import * | |
from ida_netnode import * | |
import idaapi | |
# ---------------------------------------------------------------------- | |
class chip8_processor_t(idaapi.processor_t): | |
""" | |
Processor module classes must derive from idaapi.processor_t | |
The required and optional attributes/callbacks are illustrated in this template | |
""" | |
# IDP id ( Numbers above 0x8000 are reserved for the third-party modules) | |
id = 0x8000 + 1 | |
# Processor features | |
flag = PR_ASSEMBLE | PR_SEGS | PR_DEFSEG32 | PR_USE32 | PRN_HEX | PR_RNAMESOK | PR_NO_SEGMOVE | |
# Number of bits in a byte for code segments (usually 8) | |
# IDA supports values up to 32 bits | |
cnbits = 8 | |
# Number of bits in a byte for non-code segments (usually 8) | |
# IDA supports values up to 32 bits | |
dnbits = 8 | |
# short processor names | |
# Each name should be shorter than 9 characters | |
psnames = ['chip8'] | |
# long processor names | |
# No restriction on name lengthes. | |
plnames = ['Chip-8'] | |
PTRSZ = 2 | |
# | |
# Number of digits in floating numbers after the decimal point. | |
# If an element of this array equals 0, then the corresponding | |
# floating point data is not used for the processor. | |
# This array is used to align numbers in the output. | |
# real_width[0] - number of digits for short floats (only PDP-11 has them) | |
# real_width[1] - number of digits for "float" | |
# real_width[2] - number of digits for "double" | |
# real_width[3] - number of digits for "long double" | |
# Example: IBM PC module has { 0,7,15,19 } | |
# | |
# (optional) | |
# icode (or instruction number) of return instruction. It is ok to give any of possible return | |
# instructions | |
# only one assembler is supported | |
assembler = { | |
# flag | |
'flag' : ASH_HEXF3 | AS_UNEQU | AS_COLON | ASB_BINF4 | AS_N2CHR, | |
# user defined flags (local only for IDP) (optional) | |
'uflag' : 0, | |
# Assembler name (displayed in menus) | |
'name': "My processor module bytecode assembler", | |
# array of automatically generated header lines they appear at the start of disassembled text (optional) | |
'header': ["Line1", "Line2"], | |
# org directive | |
'origin': "org", | |
# end directive | |
'end': "end", | |
# comment string (see also cmnt2) | |
'cmnt': ";", | |
# ASCII string delimiter | |
'ascsep': "\"", | |
# ASCII char constant delimiter | |
'accsep': "'", | |
# ASCII special chars (they can't appear in character and ascii constants) | |
'esccodes': "\"'", | |
# | |
# Data representation (db,dw,...): | |
# | |
# ASCII string directive | |
'a_ascii': "db", | |
# byte directive | |
'a_byte': "db", | |
# word directive | |
'a_word': "dw", | |
# remove if not allowed | |
'a_dword': "dd", | |
# remove if not allowed | |
'a_qword': "dq", | |
# remove if not allowed | |
'a_oword': "xmmword", | |
# remove if not allowed | |
'a_yword': "ymmword", | |
# float; 4bytes; remove if not allowed | |
'a_float': "dd", | |
# double; 8bytes; NULL if not allowed | |
'a_double': "dq", | |
# long double; NULL if not allowed | |
'a_tbyte': "dt", | |
# packed decimal real; remove if not allowed (optional) | |
'a_packreal': "", | |
# array keyword. the following | |
# sequences may appear: | |
# #h - header | |
# #d - size | |
# #v - value | |
# #s(b,w,l,q,f,d,o) - size specifiers | |
# for byte,word, | |
# dword,qword, | |
# float,double,oword | |
'a_dups': "#d dup(#v)", | |
# uninitialized data directive (should include '%s' for the size of data) | |
'a_bss': "%s dup ?", | |
# 'equ' Used if AS_UNEQU is set (optional) | |
'a_equ': ".equ", | |
# 'seg ' prefix (example: push seg seg001) | |
'a_seg': "seg", | |
# current IP (instruction pointer) symbol in assembler | |
'a_curip': "$", | |
# "public" name keyword. NULL-gen default, ""-do not generate | |
'a_public': "public", | |
# "weak" name keyword. NULL-gen default, ""-do not generate | |
'a_weak': "weak", | |
# "extrn" name keyword | |
'a_extrn': "extrn", | |
# "comm" (communal variable) | |
'a_comdef': "", | |
# "align" keyword | |
'a_align': "align", | |
# Left and right braces used in complex expressions | |
'lbrace': "(", | |
'rbrace': ")", | |
# % mod assembler time operation | |
'a_mod': "%", | |
# & bit and assembler time operation | |
'a_band': "&", | |
# | bit or assembler time operation | |
'a_bor': "|", | |
# ^ bit xor assembler time operation | |
'a_xor': "^", | |
# ~ bit not assembler time operation | |
'a_bnot': "~", | |
# << shift left assembler time operation | |
'a_shl': "<<", | |
# >> shift right assembler time operation | |
'a_shr': ">>", | |
# size of type (format string) (optional) | |
'a_sizeof_fmt': "size %s", | |
'flag2': 0, | |
# comment close string (optional) | |
# this is used to denote a string which closes comments, for example, if the comments are represented with (* ... *) | |
# then cmnt = "(*" and cmnt2 = "*)" | |
'cmnt2': "", | |
# low8 operation, should contain %s for the operand (optional fields) | |
'low8': "", | |
'high8': "", | |
'low16': "", | |
'high16': "", | |
# the include directive (format string) (optional) | |
'a_include_fmt': "include %s", | |
# if a named item is a structure and displayed in the verbose (multiline) form then display the name | |
# as printf(a_strucname_fmt, typename) | |
# (for asms with type checking, e.g. tasm ideal) | |
# (optional) | |
'a_vstruc_fmt': "", | |
# 'rva' keyword for image based offsets (optional) | |
# (see nalt.hpp, REFINFO_RVA) | |
'a_rva': "rva" | |
} # Assembler | |
FL_K = 0x1 # KEY_PRESSED | |
# ---------------------------------------------------------------------- | |
# The following callbacks are optional. | |
# *** Please remove the callbacks that you don't plan to implement *** | |
def notify_out_header(self, ctx): | |
"""function to produce start of disassembled text""" | |
pass | |
def notify_out_footer(self, ctx): | |
"""function to produce end of disassembled text""" | |
pass | |
def notify_out_segstart(self, ctx, ea): | |
"""function to produce start of segment""" | |
pass | |
def notify_out_segend(self, ctx, ea): | |
"""function to produce end of segment""" | |
pass | |
def notify_out_assumes(self, ctx): | |
"""function to produce assume directives""" | |
pass | |
def notify_term(self): | |
"""called when the processor module is unloading""" | |
pass | |
def notify_setup_til(self): | |
"""Setup default type libraries (called after loading a new file into the database) | |
The processor module may load tils, setup memory model and perform other actions required to set up the type system | |
@return: None | |
""" | |
pass | |
def notify_newprc(self, nproc, keep_cfg): | |
""" | |
Before changing proccesor type | |
nproc - processor number in the array of processor names | |
return >=0-ok,<0-prohibit | |
""" | |
return 0 | |
def notify_newfile(self, filename): | |
"""A new file is loaded (already)""" | |
pass | |
def notify_oldfile(self, filename): | |
"""An old file is loaded (already)""" | |
pass | |
def notify_newbinary(self, filename, fileoff, basepara, binoff, nbytes): | |
""" | |
Before loading a binary file | |
args: | |
filename - binary file name | |
fileoff - offset in the file | |
basepara - base loading paragraph | |
binoff - loader offset | |
nbytes - number of bytes to load | |
Returns nothing | |
""" | |
pass | |
def notify_undefine(self, ea): | |
""" | |
An item in the database (insn or data) is being deleted | |
@param args: ea | |
@return: >=0-ok, <0 - the kernel should stop | |
if the return value is not negative: | |
bit0 - ignored | |
bit1 - do not delete srareas at the item end | |
""" | |
return 1 | |
def notify_endbinary(self, ok): | |
""" | |
After loading a binary file | |
args: | |
ok - file loaded successfully? | |
""" | |
pass | |
def notify_assemble(self, ea, cs, ip, use32, line): | |
""" | |
Assemble an instruction | |
(make sure that PR_ASSEMBLE flag is set in the processor flags) | |
(display a warning if an error occurs) | |
args: | |
ea - linear address of instruction | |
cs - cs of instruction | |
ip - ip of instruction | |
use32 - is 32bit segment? | |
line - line to assemble | |
returns the opcode string | |
""" | |
pass | |
def notify_savebase(self): | |
"""The database is being saved. Processor module should save its local data""" | |
pass | |
def notify_out_data(self, ctx, analyze_only): | |
""" | |
Generate text represenation of data items | |
This function MAY change the database and create cross-references, etc. | |
""" | |
pass | |
def notify_cmp_opnd(self, op1, op2): | |
""" | |
Compare instruction operands. | |
Returns 1-equal,0-not equal operands. | |
""" | |
return False | |
def notify_can_have_type(self, op): | |
""" | |
Can the operand have a type as offset, segment, decimal, etc. | |
(for example, a register AX can't have a type, meaning that the user can't | |
change its representation. see bytes.hpp for information about types and flags) | |
Returns: bool | |
""" | |
return True | |
def translate(self, base, offset): | |
""" | |
Translation function for offsets | |
Currently used in the offset display functions | |
to calculate the referenced address | |
Returns: ea_t | |
""" | |
return BADADDR | |
def notify_set_idp_options(self, keyword, type, value): | |
""" | |
Set IDP-specific option | |
args: | |
keyword - the option name | |
or empty string (check type when 0 below) | |
type - one of | |
IDPOPT_STR string constant | |
IDPOPT_NUM number | |
IDPOPT_BIT zero/one | |
IDPOPT_I64 64bit number | |
0 -> You should display a dialog to configure the processor module | |
value - the actual value | |
Returns: | |
IDPOPT_OK ok | |
IDPOPT_BADKEY illegal keyword | |
IDPOPT_BADTYPE illegal type of value | |
IDPOPT_BADVALUE illegal value (bad range, for example) | |
otherwise return a string containing the error messages | |
""" | |
return idaapi.IDPOPT_OK | |
def notify_gen_map_file(self, qfile): | |
""" | |
Generate map file. If this function is absent then the kernel will create the map file. | |
This function returns number of lines in output file. | |
0 - empty file, -1 - write error | |
""" | |
r1 = qfile.write("Line 1\n") | |
r2 = qfile.write("Line 2\n!") | |
return 2 # two lines | |
def notify_create_func_frame(self, func_ea): | |
""" | |
Create a function frame for a newly created function. | |
Set up frame size, its attributes etc. | |
""" | |
return False | |
def notify_is_far_jump(self, icode): | |
""" | |
Is indirect far jump or call instruction? | |
meaningful only if the processor has 'near' and 'far' reference types | |
""" | |
return False | |
def notify_is_align_insn(self, ea): | |
""" | |
Is the instruction created only for alignment purposes? | |
Returns: number of bytes in the instruction | |
""" | |
return 0 | |
def notify_out_special_item(self, ctx, segtype): | |
""" | |
Generate text representation of an item in a special segment | |
i.e. absolute symbols, externs, communal definitions etc. | |
Returns: 1-overflow, 0-ok | |
""" | |
return 0 | |
def notify_get_frame_retsize(self, func_ea): | |
""" | |
Get size of function return address in bytes | |
If this function is absent, the kernel will assume | |
4 bytes for 32-bit function | |
2 bytes otherwise | |
""" | |
return 2 | |
def notify_is_switch(self, swi, some): | |
""" | |
Find 'switch' idiom. | |
Fills 'si' structure with information | |
@return: Boolean (True if switch was found and False otherwise) | |
""" | |
return False | |
def notify_is_sp_based(self, op): | |
""" | |
Check whether the operand is relative to stack pointer or frame pointer. | |
This function is used to determine how to output a stack variable | |
This function may be absent. If it is absent, then all operands | |
are sp based by default. | |
Define this function only if some stack references use frame pointer | |
instead of stack pointer. | |
returns flags: | |
OP_FP_BASED operand is FP based | |
OP_SP_BASED operand is SP based | |
OP_SP_ADD operand value is added to the pointer | |
OP_SP_SUB operand value is substracted from the pointer | |
""" | |
return idaapi.OP_FP_BASED | |
def notify_add_func(self, func_ea): | |
""" | |
The kernel has added a function. | |
@param func_ea: function start EA | |
@return: Nothing | |
""" | |
pass | |
def notify_del_func(self, func_ea): | |
""" | |
The kernel is about to delete a function | |
@param func_ea: function start EA | |
@return: 0-ok,<0-do not delete | |
""" | |
return 0 | |
def notify_get_autocmt(self, insn): | |
""" | |
Get instruction comment. 'insn' describes the instruction in question | |
@return: None or the comment string | |
""" | |
if 'cmt' in self.instruc[insn.itype]: | |
return self.instruc[insn.itype]['cmt'](insn) | |
def notify_create_switch_xrefs(self, jumpea, swi): | |
"""Create xrefs for a custom jump table | |
@param jumpea: address of the jump insn | |
@param swi: switch information | |
@return: None | |
""" | |
pass | |
def notify_calc_step_over(self, ip): | |
""" | |
Calculate the address of the instruction which will be | |
executed after "step over". The kernel will put a breakpoint there. | |
If the step over is equal to step into or we can not calculate | |
the address, return BADADDR. | |
args: | |
ip - instruction address | |
returns: target or BADADDR | |
""" | |
return idaapi.BADADDR | |
def notify_may_be_func(self, insn, state): | |
""" | |
can a function start here? | |
the instruction is in 'insn' | |
arg: state -- autoanalysis phase | |
state == 0: creating functions | |
== 1: creating chunks | |
returns: probability 0..100 | |
""" | |
return 0 | |
def notify_str2reg(self, regname): | |
""" | |
Convert a register name to a register number | |
args: regname | |
Returns: register number or -1 if not avail | |
The register number is the register index in the reg_names array | |
Most processor modules do not need to implement this callback | |
It is useful only if ph.reg_names[reg] does not provide | |
the correct register names | |
""" | |
# r = regname2index(regname) | |
try: | |
r = self.reg_names.index(regname) | |
except ValueError: | |
r = -1 | |
if r < 0: | |
return -1 | |
else: | |
return r | |
def notify_is_sane_insn(self, insn, no_crefs): | |
""" | |
is the instruction sane for the current file type? | |
args: no_crefs | |
1: the instruction has no code refs to it. | |
ida just tries to convert unexplored bytes | |
to an instruction (but there is no other | |
reason to convert them into an instruction) | |
0: the instruction is created because | |
of some coderef, user request or another | |
weighty reason. | |
The instruction is in 'insn' | |
returns: >=0-ok, <0-no, the instruction isn't | |
likely to appear in the program | |
""" | |
return -1 | |
def notify_func_bounds(self, code, func_ea, max_func_end_ea): | |
""" | |
find_func_bounds() finished its work | |
The module may fine tune the function bounds | |
args: | |
possible code - one of FIND_FUNC_XXX (check find_func_bounds) | |
func_ea - func start ea | |
max_func_end_ea (from the kernel's point of view) | |
returns: possible_return_code | |
""" | |
return FIND_FUNC_OK | |
def asm_out_func_header(self, ctx, func_ea): | |
"""generate function header lines""" | |
pass | |
def asm_out_func_footer(self, ctx, func_ea): | |
"""generate function footer lines""" | |
pass | |
def asm_get_type_name(self, flag, ea_or_id): | |
""" | |
Get name of type of item at ea or id. | |
(i.e. one of: byte,word,dword,near,far,etc...) | |
""" | |
if is_code(flag): | |
pfn = get_func(ea_or_id) | |
# return get func name | |
elif is_word(flag): | |
return "word" | |
return "" | |
def notify_init(self, idp_file): | |
# init returns >=0 on success | |
ida_ida.cvar.inf.set_be(True) | |
return 0 | |
def notify_out_label(self, ctx, label): | |
""" | |
The kernel is going to generate an instruction label line | |
or a function header. | |
args: | |
ctx - output context | |
label - label to output | |
If returns value <0, then the kernel should not generate the label | |
""" | |
return 0 | |
def notify_rename(self, ea, new_name): | |
""" | |
The kernel is going to rename a byte | |
args: | |
ea - | |
new_name - | |
If returns value <0, then the kernel should not rename it | |
""" | |
return 0 | |
def notify_may_show_sreg(self, ea): | |
""" | |
The kernel wants to display the segment registers | |
in the messages window. | |
args: | |
ea | |
if this function returns <0 | |
then the kernel will not show | |
the segment registers. | |
(assuming that the module have done it) | |
""" | |
return 0 | |
def notify_coagulate(self, start_ea): | |
""" | |
Try to define some unexplored bytes | |
This notification will be called if the | |
kernel tried all possibilities and could | |
not find anything more useful than to | |
convert to array of bytes. | |
The module can help the kernel and convert | |
the bytes into something more useful. | |
args: | |
start_ea - | |
returns: number of converted bytes | |
""" | |
return 0 | |
def notify_closebase(self): | |
""" | |
The database will be closed now | |
""" | |
pass | |
def notify_load_idasgn(self, short_sig_name): | |
""" | |
FLIRT signature have been loaded for normal processing | |
(not for recognition of startup sequences) | |
args: | |
short_sig_name | |
""" | |
pass | |
def notify_auto_empty(self): | |
""" | |
Info: all analysis queues are empty. | |
This callback is called once when the | |
initial analysis is finished. If the queue is | |
not empty upon the return from this callback, | |
it will be called later again | |
""" | |
pass | |
def notify_is_call_insn(self, insn): | |
""" | |
Is the instruction a "call"? | |
args | |
insn - instruction | |
returns: 0-unknown, <0-no, 1-yes | |
""" | |
return 0 | |
def notify_is_ret_insn(self, insn, strict): | |
""" | |
Is the instruction a "return"? | |
insn - instruction | |
strict - 1: report only ret instructions | |
0: include instructions like "leave" | |
which begins the function epilog | |
returns: 0-unknown, <0-no, 1-yes | |
""" | |
return 0 | |
def notify_kernel_config_loaded(self): | |
""" | |
This callback is called when ida.cfg is parsed | |
""" | |
pass | |
def notify_is_alloca_probe(self, ea): | |
""" | |
Does the function at 'ea' behave as __alloca_probe? | |
args: | |
ea | |
returns: 1-yes, 0-false | |
""" | |
return 0 | |
def notify_gen_src_file_lnnum(self, ctx, filename, lnnum): | |
""" | |
Callback: generate analog of | |
#line "file.c" 123 | |
directive. | |
args: | |
ctx - output context | |
file - source file (may be NULL) | |
lnnum - line number | |
returns: 1-directive has been generated | |
""" | |
return 0 | |
def notify_is_insn_table_jump(self, insn): | |
""" | |
Callback: determine if instruction is a table jump or call | |
If CF_JUMP bit can not describe all kinds of table | |
jumps, please define this callback. | |
It will be called for insns with CF_JUMP bit set. | |
input: insn structure contains the current instruction | |
returns: 0-yes, <0-no | |
""" | |
return -1 | |
def notify_auto_empty_finally(self): | |
""" | |
Info: all analysis queues are empty definitively | |
""" | |
pass | |
def notify_is_indirect_jump(self, insn): | |
""" | |
Callback: determine if instruction is an indrect jump | |
If CF_JUMP bit can not describe all jump types | |
jumps, please define this callback. | |
input: insn structure contains the current instruction | |
returns: 0-use CF_JUMP, 1-no, 2-yes | |
""" | |
return 0 | |
def notify_determined_main(self, main_ea): | |
""" | |
The main() function has been determined | |
""" | |
pass | |
def notify_validate_flirt_func(self, ea, funcname): | |
""" | |
flirt has recognized a library function | |
this callback can be used by a plugin or proc module | |
to intercept it and validate such a function | |
args: | |
start_ea | |
funcname | |
returns: -1-do not create a function, | |
0-function is validated | |
""" | |
return 0 | |
def notify_set_proc_options(self, options, confidence): | |
""" | |
called if the user specified an option string in the command line: | |
-p<processor name>:<options> | |
can be used for e.g. setting a processor subtype | |
also called if option string is passed to set_processor_type() | |
and IDC's set_processor_type() | |
args: | |
options | |
confidence - 0: loader's suggestion, | |
1: user's decision | |
returns: <0 - bad option string | |
""" | |
return 0 | |
def notify_creating_segm(self, start_ea, segm_name, segm_class): | |
""" | |
A new segment is about to be created | |
args: | |
start_ea | |
segm_name | |
segm_class | |
return >=0-ok, <0-segment should not be created | |
""" | |
return 0 | |
def notify_auto_queue_empty(self, type): | |
""" | |
One analysis queue is empty. | |
args: | |
atype_t type | |
This callback can be called many times, so | |
only the auto_mark() functions can be used from it | |
(other functions may work but it is not tested) | |
""" | |
return 1 | |
def notify_gen_regvar_def(self, ctx, canon, user, cmt): | |
""" | |
generate register variable definition line | |
args: | |
ctx - output context | |
canon - canonical register name (case-insensitive) | |
user - user-defined register name | |
cmt - comment to appear near definition | |
returns: >0-ok | |
""" | |
return 0 | |
def notify_setsgr(self, start_ea, end_ea, regnum, value, old_value, tag): | |
""" | |
The kernel has changed a segment register value | |
args: | |
start_ea | |
end_ea | |
regnum | |
value | |
old_value | |
uchar tag (SR_... values) | |
returns: 0-ok, <0-error | |
""" | |
return 0 | |
def notify_set_compiler(self): | |
""" | |
The kernel has changed the compiler information | |
""" | |
pass | |
def notify_is_basic_block_end(self, insn, call_insn_stops_block): | |
""" | |
Is the current instruction end of a basic block? | |
This function should be defined for processors | |
with delayed jump slots. The current instruction | |
is stored in 'insn' | |
args: | |
call_insn_stops_block | |
returns: 0-unknown, -1-no, 1-yes | |
""" | |
return 0 | |
def notify_make_code(self, insn): | |
""" | |
An instruction is being created | |
args: | |
insn | |
returns: 0-ok, <0-the kernel should stop | |
""" | |
return 0 | |
def notify_make_data(self, ea, flags, tid, size): | |
""" | |
A data item is being created | |
args: | |
ea | |
flags | |
tid | |
size | |
returns: 0-ok, <0-the kernel should stop | |
""" | |
return 0 | |
def notify_moving_segm(self, start_ea, segm_name, segm_class, to_ea, flags): | |
""" | |
May the kernel move the segment? | |
args: | |
start_ea, segm_name, segm_class - segment to move | |
to_ea - new segment start address | |
int flags - combination of MSF_... bits | |
returns: 0-yes, <0-the kernel should stop | |
""" | |
return 0 | |
def notify_move_segm(self, from_ea, start_ea, segm_name, segm_class, changed_netdelta): | |
""" | |
A segment is moved | |
Fix processor dependent address sensitive information | |
args: | |
from_ea - old segment address | |
start_ea, segm_name, segm_class - moved segment | |
changed_netdelta - if ea-to-netnode mapping has been changed | |
returns: nothing | |
""" | |
pass | |
def notify_verify_noreturn(self, func_start_ea): | |
""" | |
The kernel wants to set 'noreturn' flags for a function | |
args: | |
func_start_ea | |
Returns: 0-ok, <0-do not set 'noreturn' flag | |
""" | |
return 0 | |
def notify_verify_sp(self, func_start_ea): | |
""" | |
All function instructions have been analyzed | |
Now the processor module can analyze the stack pointer | |
for the whole function | |
args: | |
func_start_ea | |
Returns: 0-ok, <0-bad stack pointer | |
""" | |
return 0 | |
def notify_renamed(self, ea, new_name, is_local_name): | |
""" | |
The kernel has renamed a byte | |
args: | |
ea | |
new_name | |
is_local_name | |
Returns: nothing. See also the 'rename' event | |
""" | |
pass | |
def notify_set_func_start(self, func_start_ea, func_end_ea, new_ea): | |
""" | |
Function chunk start address will be changed | |
args: | |
func_start_ea, func_end_ea | |
new_ea | |
Returns: 0-ok,<0-do not change | |
""" | |
return 0 | |
def notify_set_func_end(self, func_start_ea, func_end_ea, new_end_ea): | |
""" | |
Function chunk end address will be changed | |
args: | |
func_start_ea, func_end_ea | |
new_end_ea | |
Returns: 0-ok,<0-do not change | |
""" | |
return 0 | |
def notify_treat_hindering_item(self, hindering_item_ea, new_item_flags, new_item_ea, new_item_length): | |
""" | |
An item hinders creation of another item | |
args: | |
hindering_item_ea | |
new_item_flags | |
new_item_ea | |
new_item_length | |
Returns: 0-no reaction, <0-the kernel may delete the hindering item | |
""" | |
return 0 | |
def notify_get_operand_string(self, insn, opnum): | |
""" | |
Request text string for operand (cli, java, ...) | |
args: | |
insn - the instruction | |
opnum - the operand number; -1 means any string operand | |
Returns: requested | |
""" | |
return "" | |
def notify_coagulate_dref(self, from_ea, to_ea, may_define, code_ea): | |
""" | |
data reference is being analyzed | |
args: | |
from_ea, to_ea, may_define, code_ea | |
plugin may correct code_ea (e.g. for thumb mode refs, we clear the last bit) | |
Returns: new code_ea or -1 - cancel dref analysis | |
""" | |
return 0 | |
# ---------------------------------------------------------------------- | |
# The following callbacks are mandatory | |
# | |
def notify_emu(self, insn): | |
""" | |
Emulate instruction, create cross-references, plan to analyze | |
subsequent instructions, modify flags etc. Upon entrance to this function | |
all information about the instruction is in 'insn' structure. | |
If zero is returned, the kernel will delete the instruction. | |
""" | |
feature = insn.get_canon_feature() | |
flow = (feature & CF_STOP) == 0 | |
if feature & CF_JUMP: | |
remember_problem(PR_JUMP, insn.ea) | |
if insn.itype == self.itype_1000: | |
add_cref(insn.ea, insn.Op1.addr, fl_JN) | |
flow = False | |
elif insn.itype == self.itype_2000: | |
add_cref(insn.ea, insn.Op1.addr, fl_CN) | |
elif insn.itype in (self.itype_3000, self.itype_4000, self.itype_5000, self.itype_9000): | |
add_cref(insn.ea, insn.ea + insn.size*2, fl_JN) | |
if flow: | |
add_cref(insn.ea, insn.ea + insn.size, fl_F) | |
return 1 | |
def notify_out_operand(self, ctx, op): | |
""" | |
Generate text representation of an instructon operand. | |
This function shouldn't change the database, flags or anything else. | |
All these actions should be performed only by u_emu() function. | |
The output text is placed in the output buffer initialized with init_output_buffer() | |
This function uses out_...() functions from ua.hpp to generate the operand text | |
Returns: 1-ok, 0-operand is hidden. | |
""" | |
optype = op.type | |
fl = op.specval | |
if optype == o_reg: | |
ctx.out_register(self.reg_names[op.reg]) | |
elif optype == o_imm: | |
# for immediate loads, use the transfer width (type of first operand) | |
# if op.n == 1: | |
# width = self.dt_to_width(ctx.insn.Op1.dtype) | |
# else: | |
# width = OOFW_32 if self.PTRSZ == 4 else OOFW_64 | |
ctx.out_value(op,8) | |
elif optype in [o_near, o_mem]: | |
r = ctx.out_name_expr(op, op.addr, BADADDR) | |
if not r: | |
ctx.out_tagon(COLOR_ERROR) | |
ctx.out_btoa(op.addr, 4) | |
ctx.out_tagoff(COLOR_ERROR) | |
remember_problem(PR_NONAME, ctx.insn.ea) | |
elif optype == o_displ: | |
if fl & self.FL_K == self.FL_K: | |
ctx.out_symbol('[') | |
ctx.out_char('K') | |
ctx.out_symbol(']') | |
else: | |
ctx.out_symbol('[') | |
ctx.out_register(self.reg_names[op.reg]) | |
ctx.out_symbol(']') | |
else: | |
return False | |
return True | |
def notify_out_insn(self, ctx): | |
""" | |
Generate text representation of an instruction in 'ctx.insn' structure. | |
This function shouldn't change the database, flags or anything else. | |
All these actions should be performed only by u_emu() function. | |
Returns: nothing | |
""" | |
ctx.out_mnemonic() | |
ctx.out_one_operand(0) | |
for i in range(1, 4): | |
op = ctx.insn[i] | |
if op.type == o_void: | |
break | |
ctx.out_symbol(',') | |
ctx.out_char(' ') | |
ctx.out_one_operand(i) | |
ctx.set_gen_cmt() | |
ctx.flush_outbuf() | |
def notify_ana(self, insn): | |
""" | |
Decodes an instruction into insn | |
Returns: insn.size (=the size of the decoded instruction) or zero | |
""" | |
opcode = insn.get_next_word() | |
ins = self.get_idef(opcode) | |
if ins is None: | |
return 0 | |
else: | |
ins.d(insn, opcode) | |
insn.itype = getattr(self,"itype_" + ("%04X" % ins.opcode) ) | |
# Return decoded instruction size or zero | |
return insn.size | |
def get_idef(self, opcode): | |
for i in self.itable: | |
if opcode & i.mask == i.opcode: | |
return i | |
return None | |
def init_registers(self): | |
# register names | |
self.reg_names = [ | |
# General purpose registers | |
"V0", | |
"V1", | |
"V2", | |
"V3", | |
"V4", | |
"V5", | |
"V6", | |
"V7", | |
"V8", | |
"V9", | |
"VA", | |
"VB", | |
"VC", | |
"VD", | |
"VE", | |
"VF", | |
# | |
"PC",# Instruction pointer | |
"I", # Index register 12bit | |
"DT",# Delay timer | |
"ST",# Sound timer | |
# Fake segment registers | |
"CS", | |
"DS" | |
] | |
# number of registers (optional: deduced from the len(reg_names)) | |
self.regs_num = len(self.reg_names) | |
# Segment register information (use virtual CS and DS registers if your | |
# processor doesn't have segment registers): | |
self.reg_first_sreg = 19 # index of CS | |
self.reg_last_sreg = 20 # index of DS | |
# size of a segment register in bytes | |
self.segreg_size = 0 | |
# You should define 2 virtual segment registers for CS and DS. | |
# number of CS/DS registers | |
self.reg_code_sreg = 19 | |
self.reg_data_sreg = 20 | |
def decode_OP(self, insn, opcode): | |
pass | |
def decode_XY(self, insn, opcode): | |
insn.Op1.type = o_reg | |
insn.Op1.reg = (opcode & 0x0f00)>>8 | |
insn.Op2.type = o_reg | |
insn.Op2.reg = (opcode & 0x00f0)>>4 | |
def decode_NNN_mem(self, insn, opcode): | |
insn.Op1.type = o_near | |
insn.Op1.dtype = dt_word | |
insn.Op1.addr = opcode & 0xfff | |
def decode_XNN(self, insn, opcode): | |
insn.Op1.type = o_reg | |
insn.Op1.reg = (opcode & 0x0f00)>>8 | |
insn.Op2.type = o_imm | |
insn.Op2.dtype = dt_byte | |
insn.Op2.value = opcode & 0x00ff | |
def decode_NNN(self, insn, opcode): | |
insn.Op1.type = o_imm | |
insn.Op1.dtype = dt_word | |
isnn.Op1.value = opcode & 0xfff | |
def decode_XYN(self, insn, opcode): | |
self.decode_XY(insn,opcode) | |
insn.Op3.type = o_imm | |
insn.Op3.dtype = dt_byte | |
insn.Op3.value = opcode & 0xf | |
def decode_X(self, insn, opcode): | |
insn.Op1.type = o_reg | |
insn.Op1.reg = (opcode & 0x0f00)>>8 | |
def decode_LD_I(self, insn, opcode): | |
insn.Op1.type = o_reg | |
insn.Op1.reg = self.reg_names.index("I") | |
insn.Op2.type = o_near | |
insn.Op2.dtype = dt_word | |
insn.Op2.addr = opcode & 0xfff | |
def decode_JP_V0(self, insn, opcode): | |
insn.Op1.type = o_reg | |
insn.Op1.reg = self.reg_names.index("V0") | |
insn.Op2.type = o_imm | |
insn.Op2.dtype = dt_word | |
insn.Op2.value = opcode & 0xfff | |
def decode_LD_K(self, insn, opcode): | |
self.decode_X(insn, opcode) | |
insn.Op2.type = o_displ | |
insn.Op2.specval |= self.FL_K | |
def decode_LD_VX_DT(self, insn, opcode): | |
self.decode_X(insn,opcode) | |
insn.Op2.type = o_displ | |
insn.Op2.reg = self.reg_names.index("DT") | |
def decode_LD_DT_VX(self, insn, opcode): | |
insn.Op1.type = o_displ | |
insn.Op1.reg = self.reg_names.index("DT") | |
insn.Op2.type = o_reg | |
insn.Op2.reg = (opcode & 0x0f00)>>8 | |
def decode_LD_ST(self, insn, opcode): | |
insn.Op1.type = o_displ | |
insn.Op1.reg = self.reg_names.index("ST") | |
insn.Op2.type = o_reg | |
insn.Op2.reg = (opcode & 0x0f00)>>8 | |
def decode_STORE_I(self, insn, opcode): | |
insn.Op1.type = o_displ | |
insn.Op1.reg = self.reg_names.index("I") | |
insn.Op2.type = o_reg | |
insn.Op2.reg = (opcode & 0x0f00)>>8 | |
def decode_LOAD_I(self, insn, opcode): | |
insn.Op1.type = o_reg | |
insn.Op1.reg = (opcode & 0x0f00)>>8 | |
insn.Op2.type = o_displ | |
insn.Op2.reg = self.reg_names.index("I") | |
def decode_ADD_I(self, insn, opcode): | |
insn.Op1.type = o_displ | |
insn.Op1.reg = self.reg_names.index("I") | |
insn.Op2.type = o_reg | |
insn.Op2.reg = (opcode & 0x0f00)>>8 | |
def init_instruction(self): | |
class idef: | |
def __init__(self, opcode, mask, name, cf, d, cmt = None): | |
self.opcode = opcode | |
self.mask = mask | |
self.name = name | |
self.cf = cf | |
self.d = d | |
self.cmt = cmt | |
self.itable = [ | |
idef(opcode=0x00E0, mask=0xffff, name="CLS", d=self.decode_OP, cf=0, cmt="Clear video memory"), | |
idef(opcode=0x00EE, mask=0xffff, name="RET", d=self.decode_OP, cf=CF_STOP, cmt="Return from subroutine"), | |
idef(opcode=0x0000, mask=0xf000, name="SYS", d=self.decode_NNN_mem, cf=CF_USE1, cmt="Call CDP1802 subroutine at NNN"), | |
idef(opcode=0x1000, mask=0Xf000, name="JP", d=self.decode_NNN_mem, cf=CF_USE1|CF_JUMP, cmt="Jump to address NNN"), | |
idef(opcode=0x2000, mask=0xf000, name="CALL", d=self.decode_NNN_mem, cf=CF_USE1|CF_CALL, cmt="Call CHIP-8 subroutine at NNN"), | |
idef(opcode=0x3000, mask=0xf000, name="SE", d=self.decode_XNN, cf=CF_USE1|CF_USE2|CF_JUMP, cmt="Skip next instruction if VX == NN"), | |
idef(opcode=0x4000, mask=0xf000, name="SNE", d=self.decode_XNN, cf=CF_USE1|CF_USE2|CF_JUMP, cmt="Skip next instruction if VX != NN"), | |
idef(opcode=0x5000, mask=0xf00f, name="SE", d=self.decode_XY, cf=CF_USE1|CF_USE2|CF_JUMP, cmt="Skip next instruction if VX == VY"), | |
idef(opcode=0x6000, mask=0xf000, name="LD", d=self.decode_XNN, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VX = NN"), | |
idef(opcode=0x7000, mask=0xf000, name="ADD", d=self.decode_XNN, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VX = VX + NN"), | |
idef(opcode=0x8000, mask=0xf00f, name="LD", d=self.decode_XY, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VX = VY"), | |
idef(opcode=0x8001, mask=0xf00f, name="OR", d=self.decode_XY, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VX = VX OR VY"), | |
idef(opcode=0x8002, mask=0xf00f, name="AND", d=self.decode_XY, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VX = VX AND VY"), | |
idef(opcode=0x8003, mask=0xf00f, name="XOR", d=self.decode_XY, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VX = VX XOR VY"), | |
idef(opcode=0x8004, mask=0xf00f, name="ADD", d=self.decode_XY, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VX = VX + VY; VF = 1 if overflow else 0"), | |
idef(opcode=0x8005, mask=0xf00f, name="SUB", d=self.decode_XY, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VX = VX - VY; VF = 1 if not borrow else 0"), | |
idef(opcode=0x8006, mask=0xf00f, name="SHR", d=self.decode_XY, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VF = LSB(VX); VX = VX » 1 (** see note)"), | |
idef(opcode=0x8007, mask=0xf00f, name="SUBN", d=self.decode_XY, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VX = VY - VX; VF = 1 if not borrow else 0"), | |
idef(opcode=0x800E, mask=0xf00f, name="SHL", d=self.decode_XY, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VF = MSB(VX); VX = VX « 1 (** see note)"), | |
idef(opcode=0x9000, mask=0xf00f, name="SNE", d=self.decode_XY, cf=CF_USE1|CF_USE2|CF_JUMP, cmt="Skip next instruction if VX != VY"), | |
idef(opcode=0xA000, mask=0xf000, name="LD", d=self.decode_LD_I, cf=CF_USE1, cmt="I = NNN"), | |
idef(opcode=0xB000, mask=0xf000, name="JP", d=self.decode_JP_V0, cf=CF_USE1|CF_JUMP, cmt="Jump to address NNN + V0"), | |
idef(opcode=0xC000, mask=0xf000, name="RND", d=self.decode_XY, cf=CF_USE1|CF_CHG1|CF_USE2, cmt="VX = RND() AND NN"), | |
idef(opcode=0xD000, mask=0xf000, name="DRW", d=self.decode_XYN, cf=CF_USE1|CF_USE2|CF_USE3, cmt="Draw 8xN sprite at I to VX, VY; VF = 1 if collision else 0"), | |
idef(opcode=0xE09E, mask=0xf0ff, name="SKP", d=self.decode_X, cf=CF_USE1|CF_JUMP, cmt="Skip next instruction if key(VX) is pressed"), | |
idef(opcode=0xE0A1, mask=0xf0ff, name="SKNP", d=self.decode_X, cf=CF_USE1|CF_JUMP, cmt="Skip next instruction if key(VX) is not pressed"), | |
idef(opcode=0xF007, mask=0xf0ff, name="LD", d=self.decode_LD_VX_DT, cf=CF_USE1|CF_CHG1, cmt="LD VX, DT;Sets VX to the value of the delay timer."), | |
idef(opcode=0xF00A, mask=0xf0ff, name="LD", d=self.decode_LD_K, cf=CF_USE1|CF_CHG1, cmt="Wait for key press, store key pressed in VX"), | |
idef(opcode=0xF015, mask=0xf0ff, name="LD", d=self.decode_LD_DT_VX, cf=CF_USE1, cmt="DT = VX;Sets the delay timer to VX."), | |
idef(opcode=0xF018, mask=0xf0ff, name="LD", d=self.decode_LD_ST, cf=CF_USE1, cmt="ST = VX;Sets the sound timer to VX."), | |
idef(opcode=0xF01E, mask=0xf0ff, name="ADD", d=self.decode_ADD_I, cf=CF_USE1, cmt="I = I + VX; VF = 1 if I > 0xFFF else 0"), | |
idef(opcode=0xF029, mask=0xf0ff, name="LD", d=None, cf=CF_USE1, cmt="I = address of 4x5 font character in VX (0..F) (* see note)"), | |
idef(opcode=0xF033, mask=0xf0ff, name="BCD", d=self.decode_X, cf=CF_USE1, cmt="set_BCD(Vx);*(I+0)=BCD(3);*(I+1)=BCD(2);*(I+2)=BCD(1);Store BCD representation of VX at I (100), I+1 (10), and I+2 (1); I remains unchanged"), | |
idef(opcode=0xF055, mask=0xf0ff, name="LD", d=self.decode_STORE_I, cf=CF_USE1, cmt="Store V0..VX (inclusive) to memory starting at I; I remains unchanged"), | |
idef(opcode=0xF065, mask=0xf0ff, name="LD", d=self.decode_LOAD_I, cf=CF_USE1, cmt="Load V0..VX (inclusive) from memory starting at I; I remains unchanged") | |
] | |
Instructions = [] | |
i = 0 | |
for x in self.itable: | |
d = dict(name=x.name, feature=x.cf) | |
if x.cmt != None: | |
d['cmt'] = x.cmt | |
Instructions.append(d) | |
setattr(self, 'itype_' + ("%04X" % x.opcode), i) | |
i += 1 | |
# icode of the first instruction | |
self.instruc_start = 0 | |
# icode of the last instruction + 1 | |
self.instruc_end = len(Instructions) + 1 | |
# Array of instructions | |
self.instruc = Instructions | |
# Icode of return instruction. It is ok to give any of possible return | |
# instructions | |
self.icode_return = self.itype_00EE | |
def __init__(self): | |
self.init_registers() | |
self.init_instruction() | |
# ---------------------------------------------------------------------- | |
# Every processor module script must provide this function. | |
# It should return a new instance of a class derived from idaapi.processor_t | |
def PROCESSOR_ENTRY(): | |
return chip8_processor_t() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment