Created
December 7, 2021 04:17
-
-
Save terrynini/36d560ad61cbec449e731f0e00dcea7d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from idaapi import * | |
import os | |
import sys | |
import re | |
from clemency_inst import inst_json | |
######################################## | |
# Decoder Function | |
######################################## | |
def is_bit_string(strg, search=re.compile(r'[^01]').search): | |
return not bool(search(strg)) | |
def SIGNEXT(x, b): | |
m = 1 << (b - 1) | |
x = x & ((1 << b) - 1) | |
return (x ^ m) - m | |
def fetch(code, n): | |
byte1 = (code >> (54 - 9 * 1)) & 0x1ff | |
byte2 = (code >> (54 - 9 * 2)) & 0x1ff | |
byte3 = (code >> (54 - 9 * 3)) & 0x1ff | |
byte4 = (code >> (54 - 9 * 4)) & 0x1ff | |
byte5 = (code >> (54 - 9 * 5)) & 0x1ff | |
byte6 = (code >> (54 - 9 * 6)) & 0x1ff | |
if n == 18: | |
return (byte2 << 9) + byte1 | |
elif n == 27: | |
return (byte2 << 18) + (byte1 << 9) + byte3 | |
elif n == 36: | |
return (byte2 << 27) + (byte1 << 18) + (byte3 << 9) + byte4 | |
elif n == 54: | |
return (byte2 << 45) + (byte1 << 36) + (byte3 << 27) + (byte5 << 18) + (byte4 << 9) + byte6 | |
def calc_jump_addr(self, op, insn): | |
addr = op.addr | |
if insn.itype != self.itype_BRA and insn.itype != self.itype_CAA: | |
if insn.itype == self.itype_C or insn.itype == self.itype_B: | |
if addr & 0x10000 != 0: | |
# sign extend | |
addr = insn.ea - ((~addr & 0x1ffff) + 1) | |
else: | |
addr = addr + insn.ea | |
elif insn.itype == self.itype_CAR or insn.itype == self.itype_BRR: | |
if addr & 0x4000000 != 0: | |
# sign extend | |
addr = insn.ea - ((~addr & 0x7ffffff) + 1) | |
else: | |
addr = addr + insn.ea | |
else: | |
addr = (addr + insn.ea) & 0x7ffffff | |
return addr | |
######################################## | |
# Processor Type | |
######################################## | |
def ana_ops(self, ops, insn): | |
inst = self.itable[insn.itype] | |
opcnt = 0 | |
opidx = 0 | |
hascc = False | |
for w, v in inst.args: | |
if v[0] == '0' or v[0] == '1': | |
continue | |
if v == 'rA' or v == 'rB' or v == 'rC': | |
insn[opcnt].type = o_reg | |
insn[opcnt].dtype = dt_dword | |
insn[opcnt].reg = ops[opidx] | |
opcnt += 1 | |
elif v == 'Immediate': | |
insn[opcnt].type = o_imm | |
insn[opcnt].dtype = dt_dword | |
insn[opcnt].value = ops[opidx] | |
opcnt += 1 | |
elif v == 'Location' or v == 'Offset': | |
insn[opcnt].type = o_near | |
insn[opcnt].dtype = dt_dword | |
insn[opcnt].addr = ops[opidx] | |
opcnt += 1 | |
elif v == 'Register Count': | |
insn[opcnt - 1].type = o_displ | |
insn[opcnt - 1].dtype = dt_dword | |
insn[opcnt - 1].specval = ops[opidx] | |
insn[opcnt - 1].phrase = ops[opidx - 1] | |
offset = ops[opidx + 2] | |
if offset & 0x4000000: | |
offset -= 0x8000000 | |
insn[opcnt - 1].value = offset | |
elif v == 'Adjust rB': | |
insn.auxpref |= ops[opidx] << 5 | |
elif v == 'UF': | |
insn.auxpref |= ops[opidx] << 4 | |
elif v == 'Condition': | |
insn.auxpref |= ops[opidx] | |
hascc = True | |
elif v == 'Memory Flags': | |
insn[opcnt].type = o_idpspec0 | |
insn[opcnt].dtype = dt_dword | |
insn[opcnt].specval = ops[opidx] | |
opcnt += 1 | |
elif v == 'Memory Offset': | |
pass | |
else: | |
assert False | |
opidx += 1 | |
if not hascc: | |
insn.auxpref |= 0xF | |
def ana(self, insn): | |
current_ea = insn.ea + insn.size | |
code_bit = '' | |
for i in range(6): | |
code_bit += '{:09b}'.format(get_wide_byte(current_ea + i) & 0x1ff) | |
code = int(code_bit, 2) | |
idx = None | |
for g in self.imatch: | |
bitlen, masks = g | |
ops, imap = self.imatch[g] | |
code2 = fetch(code, bitlen) | |
bits = tuple(map(lambda x: (code2 & x[0]) >> x[1], masks)) | |
if bits in imap: | |
idx = imap[bits] | |
insn.itype = idx | |
break | |
if idx is None: | |
return 0 | |
ana_ops(self, list(map(lambda x: (code2 & x[0]) >> x[1], ops)), insn) | |
# Remove this block to disable simplifying ML+MH | |
if insn.itype == self.itype_MH and insn.ea >= 3: | |
code_bit = '' | |
last_ea = insn.ea - 3 | |
code_bit = '{:09b}{:09b}{:09b}'.format(get_wide_byte(last_ea+1) & 0x1ff, get_wide_byte(last_ea) & 0x1ff, get_wide_byte(last_ea+2) & 0x1ff) | |
code = int(code_bit, 2) | |
if (code >> 22) & 0x1f == 0x12: | |
regidx = (code >> 17) & 0x1f | |
lo = code & 0x1ffff | |
if regidx == insn[0].reg: | |
hi = insn[1].value | |
v = (hi << 10) | (lo & 0x3ff) | |
insn[1].value = v | |
insn.itype = self.itype_MEH | |
# ms 0x1ffff -> ms -1 | |
if insn.itype == self.itype_MS and (insn[1].value & 0x10000): | |
insn[1].value -= 0x20000 | |
bytelen = bitlen // 9 | |
insn.size += bytelen | |
return bytelen | |
def add_stkpnt(self, pfn, v): | |
if pfn: | |
end = self.cmd.ea + self.cmd.size | |
if not is_fixed_spd(end): | |
add_auto_stkpnt2(pfn, end, v) | |
print(hex(end), v) | |
def trace_sp(self): | |
cmd = self.cmd | |
pfn = get_func(cmd.ea) | |
if not pfn: | |
return | |
if cmd.Op1.type == o_reg and cmd.Op1.reg == self.ireg_ST: | |
if cmd.Op2.type == o_reg and cmd.Op2.reg == self.ireg_ST: | |
if cmd.itype == self.itype_SBI: | |
add_stkpnt(self, pfn, -SIGNEXT(cmd.Op3.value, 7)) | |
elif cmd.itype == self.itype_ADI: | |
add_stkpnt(self, pfn, SIGNEXT(cmd.Op3.value, 7)) | |
def emu(self, insn): | |
aux = self.get_auxpref(insn) | |
flow = False | |
if insn.itype in [self.itype_B, self.itype_BR, self.itype_BRA, self.itype_BRR]: | |
if insn.itype != self.itype_BR: | |
add_cref(0, calc_jump_addr(self, insn.Op1, insn), fl_JN) | |
if insn.itype not in [self.itype_B, self.itype_BR] or (aux & 0xF) != 0xF: | |
flow = True | |
elif insn.itype in [self.itype_C, self.itype_CR, self.itype_CAR, self.itype_CAA]: | |
if insn.itype != self.itype_CR: | |
add_cref(insn.Op1.offb, calc_jump_addr(self, insn.Op1, insn), fl_CN) | |
add_cref(0, insn.ea + insn.size, fl_F) | |
elif insn.itype in [self.itype_RE, self.itype_HT]: | |
pass | |
else: | |
flow = True | |
if flow: | |
add_cref(0, insn.ea + insn.size, fl_F) | |
if insn.itype in [self.itype_MEH]: | |
add_dref(insn[1].value, 2, dr_R) | |
c1 = get_wide_byte(insn[1].value) & 0x1ff | |
c2 = get_wide_byte(insn[1].value+1) & 0x1ff | |
c3 = get_wide_byte(insn[1].value+2) & 0x1ff | |
if c1 >= 0x20 and c1 <= 0x7f \ | |
and c2 >= 0x20 and c2 <= 0x7f \ | |
and c3 >= 0x20 and c3 <= 0x7f: | |
#MakeCustomDataEx(insn[1].value, 0, self.nstr_dtid, self.nstr_dfid) | |
create_data(insn[1].value, 0, self.nstr_dtid, self.nstr_dfid) | |
#if may_trace_sp(): | |
# if flow: | |
# trace_sp(self) | |
# else: | |
# recalc_spd(self.cmd.ea) | |
return True | |
def outop(self, ctx, op): | |
optype = op.type | |
if optype == o_reg: | |
ctx.out_register(self.reg_names[op.reg]) | |
elif optype == o_idpspec0: | |
if op.specval == 0: | |
ctx.out_symbol('N') | |
elif op.specval == 1: | |
ctx.out_symbol('R') | |
elif op.specval == 2: | |
ctx.out_symbol('R') | |
ctx.out_symbol('W') | |
elif op.specval == 3: | |
ctx.out_symbol('E') | |
else: | |
ctx.out_symbol('E') | |
ctx.out_symbol('R') | |
ctx.out_symbol('R') | |
elif optype == o_imm: | |
# take size from x.dtyp | |
ctx.out_value(op, OOFW_32 | OOF_SIGNED) | |
elif optype == o_near: | |
addr = op.addr | |
# offset | |
if ctx.insn.itype != self.itype_BRA and ctx.insn.itype != self.itype_CAA: | |
off = 0 | |
if ctx.insn.itype == self.itype_C or ctx.insn.itype == self.itype_B: | |
if addr & 0x10000 != 0: | |
# sign extend | |
off = (~addr & 0x1ffff) + 1 | |
addr = ctx.insn.ea - ((~addr & 0x1ffff) + 1) | |
ctx.out_symbol('-') | |
else: | |
off = addr | |
addr = addr + ctx.insn.ea | |
ctx.out_symbol('+') | |
elif ctx.insn.itype == self.itype_CAR or ctx.insn.itype == self.itype_BRR: | |
if addr & 0x4000000 != 0: | |
# sign extend | |
off = (~addr & 0x7ffffff) + 1 | |
addr = ctx.insn.ea - ((~addr & 0x7ffffff) + 1) | |
ctx.out_symbol('-') | |
else: | |
off = addr | |
addr = addr + ctx.insn.ea | |
ctx.out_symbol('+') | |
else: | |
off = addr | |
addr = (addr + ctx.insn.ea) & 0x7ffffff | |
ctx.out_symbol('+') | |
ctx.out_btoa(off, 16) | |
ctx.out_symbol(' ') | |
ctx.out_symbol('(') | |
r = ctx.out_name_expr(op, addr, BADADDR) | |
if not r: | |
ctx.out_tagon(COLOR_ERROR) | |
ctx.out_value(op, OOF_ADDR) | |
ctx.out_tagoff(COLOR_ERROR) | |
#QueueSet(Q_noName, ctx.insn.ea) | |
remember_problem(Q_noName, ctx.insn.ea) | |
ctx.out_symbol(')') | |
# location | |
else: | |
r = ctx.out_name_expr(op, addr, BADADDR) | |
if not r: | |
ctx.out_tagon(COLOR_ERROR) | |
ctx.out_value(op, OOF_ADDR) | |
out_tagoff(COLOR_ERROR) | |
remember_problem(Q_noName, ctx.insn.ea) | |
elif optype == o_displ: | |
ctx.out_symbol('[') | |
ctx.out_register(self.reg_names[op.phrase]) | |
ctx.out_value(op, OOFW_32 | OOFS_NEEDSIGN | OOF_SIGNED) | |
ctx.out_symbol(',') | |
ctx.out_symbol(' ') | |
ctx.out_line("%d" % (op.specval + 1)) | |
ctx.out_symbol(']') | |
return True | |
class CLEMENCY(processor_t): | |
# IDP id ( Numbers above 0x8000 are reserved for the third-party modules) | |
id = 0x8000 + 0x999 | |
# Processor features | |
flag = PR_ADJSEGS | PRN_HEX | PR_WORD_INS | |
# Number of bits in a byte for code segments (usually 8) | |
# IDA supports values up to 32 bits | |
cnbits = 9 | |
# Number of bits in a byte for non-code segments (usually 8) | |
# IDA supports values up to 32 bits | |
dnbits = 9 | |
# short processor names (NULL terminated) | |
# Each name should be shorter than 9 characters | |
psnames = ["clemency"] | |
# long processor names (NULL terminated) | |
# No restriction on name lengthes. | |
plnames = ["cLEMENCy"] | |
segreg_size = 0 | |
########################## | |
# intruction | |
# icode of the first instruction | |
instruc_start = 0 | |
assembler = { | |
'flag' : ASH_HEXF3 | ASD_DECF0 | ASO_OCTF1 | ASB_BINF3 | AS_ASCIIC ,#| AS_ASCIIZ , | |
"uflag": 0, | |
"name": "GNU assembler", | |
"origin": ".org", | |
"end": "end", | |
"cmnt": ";", | |
"ascsep": '"', | |
"accsep": "'", | |
"esccodes": "\"'", | |
"a_ascii": ".ascii", | |
"a_byte": ".byte", | |
"a_word": ".word", | |
"a_3byte": ".tribyte", | |
"a_bss": "dfs %s", | |
"a_seg": "seg", | |
"a_curip": ".", | |
"a_public": "", | |
"a_weak": "", | |
"a_extrn": ".extrn", | |
"a_comdef": "", | |
"a_align": ".align", | |
"lbrace": "(", | |
"rbrace": ")", | |
"a_mod": "%", | |
"a_band": "&", | |
"a_bor": "|", | |
"a_xor": "^", | |
"a_bnot": "~", | |
"a_shl": "<<", | |
"a_shr": ">>", | |
"a_sizeof_fmt": "size %s", | |
} | |
# flag for auxpref | |
FL_UF = 0x0010 | |
FL_CC = 0x000F | |
FL_ADJUST = 0x0060 | |
module = __import__('clemency') | |
def __init__(self): | |
processor_t.__init__(self) | |
# new data format | |
self.init_data_format() | |
# reload debug flag | |
self.doReload = os.getenv('IDA_RELOAD') | |
# init | |
self._init_registers() | |
self._init_instructions() | |
def init_data_format(self): | |
self.tribyte_dtid = register_custom_data_type(tribyte_data_type()) | |
self.tribyte_dfid = register_custom_data_format(tribyte_data_format()) | |
self.nstr_dtid = register_custom_data_type(nbit_str_data_type()) | |
self.nstr_dfid = register_custom_data_format(nbit_str_data_format()) | |
def _init_registers(self): | |
# Registers definition | |
self.reg_names = ["R%02d" % (i) for i in range(29)] + ["ST", "RA", "PC", "FL"] + ["CS", "DS"] | |
# Create the ireg_XXXX constants | |
for i in range(len(self.reg_names)): | |
setattr(self, 'ireg_' + self.reg_names[i], i) | |
# Set fake segment registers | |
self.reg_first_sreg = self.reg_code_sreg = self.ireg_CS | |
self.reg_last_sreg = self.reg_data_sreg = self.ireg_DS | |
def _init_instructions(self): | |
class idef: | |
def __init__(self, name, cmt, fmt, cf, args): | |
self.name = name | |
self.cmt = cmt | |
self.fmt = fmt | |
self.cf = cf | |
self.args = args | |
self.itable = {} | |
self.imatch = {} | |
for j in range(len(inst_json)): | |
i = inst_json[j] | |
args = [] | |
for a in i['args']: | |
args.append((a['width'], a['value'])) | |
# Set itable entry for instruction #j | |
self.itable[j] = idef(i['name'], i['desc'], i['format'], i['feature'], args) | |
# Generate matching table entry | |
ws = sum([w for w, v in args]) | |
off = 0 | |
masks = [] | |
vals = [] | |
for w, v in args: | |
if v[0] in '01': | |
masks.append(((1 << (ws - off)) - (1 << (ws - off - w)), ws - off - w)) | |
vals.append(int(v, 2)) | |
off += w | |
grp = (ws, tuple(masks)) | |
vals = tuple(vals) | |
if grp not in self.imatch: | |
ops = [] | |
off = 0 | |
for w, v in args: | |
if v[0] not in '01': | |
ops.append(((1 << (ws - off)) - (1 << (ws - off - w)), ws - off - w)) | |
off += w | |
self.imatch[grp] = (tuple(ops), {}) # (operand mask, inst match table) | |
self.imatch[grp][1][vals] = j | |
Instructions = [] | |
for j in range(len(self.itable)): | |
x = self.itable[j] | |
d = dict(name = x.name.lower(), feature=x.cf) | |
if x.cmt: | |
d['cmt'] = x.cmt | |
Instructions.append(d) | |
setattr(self, 'itype_' + x.name, j) | |
d = dict(name = 'meh', feature=0) | |
setattr(self, 'itype_MEH', len(Instructions)) | |
Instructions.append(d) | |
self.instruc_end = len(Instructions) + 1 | |
self.instruc = Instructions | |
self.icode_return = self.itype_RE | |
#def ana(self): | |
def ev_ana_insn(self, insn): | |
if self.doReload: | |
reload(self.module) | |
dynana = getattr(self.module, 'ana') | |
return dynana(self, insn) | |
#def emu(self): | |
def ev_emu_insn(self, insn): | |
if self.doReload: | |
reload(self.module) | |
dynemu = getattr(self.module, 'emu') | |
return dynemu(self, insn) | |
cc_table = [ | |
'n', | |
'e', | |
'l', | |
'le', | |
'g', | |
'ge', | |
'no', | |
'o', | |
'ns', | |
's', | |
'sl', | |
'sle', | |
'sg', | |
'sge', | |
'', | |
'', | |
] | |
def ev_out_insn(self, ctx): | |
postfix = '' | |
# Adjust Register | |
# e.g., LDSI, LDSD | |
adjust_flag = (ctx.insn.auxpref & self.FL_ADJUST) >> 5 | |
if adjust_flag == 1: | |
postfix += 'i' | |
elif adjust_flag == 2: | |
postfix += 'd' | |
# Conditional | |
# e.g., Bge | |
cc_idx = ctx.insn.auxpref & self.FL_CC | |
if cc_idx != 0xf: | |
idx = ctx.insn.auxpref & self.FL_CC | |
postfix += self.cc_table[idx] | |
# Update Flag | |
# e.g., ad. | |
if ctx.insn.auxpref & self.FL_UF != 0: | |
postfix += '.' | |
#OutMnem(12, postfix) | |
ctx.out_mnem(12, postfix) | |
for i in range(6): | |
op = ctx.insn[i] | |
if op.type == o_void: | |
break | |
if i > 0: | |
ctx.out_symbol(',') | |
ctx.out_char(' ') | |
ctx.out_one_operand(i) | |
cvar.gl_comm = 1 | |
ctx.flush_outbuf() | |
return True | |
def ev_out_operand(self,ctx, op): | |
if self.doReload: | |
reload(self.module) | |
dynoutop = getattr(self.module, 'outop') | |
return dynoutop(self, ctx, op) | |
######################################## | |
# Data format for TriBytes (9bits) | |
######################################## | |
class tribyte_data_type(data_type_t): | |
ASM_KEYWORD = ".tri" | |
def __init__(self): | |
data_type_t.__init__(self, | |
"py_tribyte", | |
1, | |
"TriBytes (9bits)", | |
'z', | |
tribyte_data_type.ASM_KEYWORD) | |
def calc_item_size(self, ea, maxsize): | |
return 3 | |
class tribyte_data_format(data_format_t): | |
def __init__(self): | |
data_format_t.__init__(self, | |
"py_tribyte_format", | |
0, | |
"TriBytes (9bits)") | |
def printf(self, value, current_ea, operand_num, dtid): | |
b1 = idaapi.get_wide_byte(current_ea) & 0x1ff | |
b2 = idaapi.get_wide_byte(current_ea+1) & 0x1ff | |
b3 = idaapi.get_wide_byte(current_ea+2) & 0x1ff | |
return hex((b2 << 18) + (b1 << 9) + b3) | |
class nbit_str_data_type(data_type_t): | |
ASM_KEYWORD = ".str" | |
def __init__(self): | |
data_type_t.__init__(self, | |
"py_str", | |
1, | |
"String (9bits)", | |
',', | |
nbit_str_data_type.ASM_KEYWORD) | |
def calc_item_size(self, ea, maxsize): | |
r = 0 | |
while True: | |
c = idaapi.get_wide_byte(ea + r) & 0x1ff | |
if c == 0 or c < 0x20 or c > 0x7f: | |
if c != 0x0d and c != 0x0a and c != 0x09 and c != 0x1b: | |
break | |
r += 1 | |
return r + 1 | |
class nbit_str_data_format(data_format_t): | |
def __init__(self): | |
data_format_t.__init__(self, | |
"py_str_format", | |
0, | |
"String (9bits) format") | |
def printf(self, value, current_ea, operand_num, dtid): | |
r = '' | |
for i in range(len(value) - 1): | |
c = idaapi.get_wide_byte(current_ea + i) & 0xff | |
if c == 0x0d: | |
r += '\\r' | |
elif c == 0x0a: | |
r += '\\n' | |
elif c == 0x09: | |
r += '\t' | |
elif c == 0x1b: | |
r += '^[' | |
else: | |
r += chr(c) | |
return '"%s", 0' % (r) | |
######################################## | |
# Processor Plugin Entry | |
######################################## | |
def PROCESSOR_ENTRY(): | |
# add proc into module path | |
script_path = os.path.abspath(__file__) | |
script_dir = os.path.dirname(script_path) | |
sys.path.insert(0, script_dir) | |
return CLEMENCY() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment