Skip to content

Instantly share code, notes, and snippets.

@terrynini
Created December 7, 2021 04:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save terrynini/36d560ad61cbec449e731f0e00dcea7d to your computer and use it in GitHub Desktop.
Save terrynini/36d560ad61cbec449e731f0e00dcea7d to your computer and use it in GitHub Desktop.
from idaapi import *
import os
import sys
import re
from clemency_inst import inst_json
########################################
# Decoder Function
########################################
def is_bit_string(strg, search=re.compile(r'[^01]').search):
return not bool(search(strg))
def SIGNEXT(x, b):
m = 1 << (b - 1)
x = x & ((1 << b) - 1)
return (x ^ m) - m
def fetch(code, n):
byte1 = (code >> (54 - 9 * 1)) & 0x1ff
byte2 = (code >> (54 - 9 * 2)) & 0x1ff
byte3 = (code >> (54 - 9 * 3)) & 0x1ff
byte4 = (code >> (54 - 9 * 4)) & 0x1ff
byte5 = (code >> (54 - 9 * 5)) & 0x1ff
byte6 = (code >> (54 - 9 * 6)) & 0x1ff
if n == 18:
return (byte2 << 9) + byte1
elif n == 27:
return (byte2 << 18) + (byte1 << 9) + byte3
elif n == 36:
return (byte2 << 27) + (byte1 << 18) + (byte3 << 9) + byte4
elif n == 54:
return (byte2 << 45) + (byte1 << 36) + (byte3 << 27) + (byte5 << 18) + (byte4 << 9) + byte6
def calc_jump_addr(self, op, insn):
addr = op.addr
if insn.itype != self.itype_BRA and insn.itype != self.itype_CAA:
if insn.itype == self.itype_C or insn.itype == self.itype_B:
if addr & 0x10000 != 0:
# sign extend
addr = insn.ea - ((~addr & 0x1ffff) + 1)
else:
addr = addr + insn.ea
elif insn.itype == self.itype_CAR or insn.itype == self.itype_BRR:
if addr & 0x4000000 != 0:
# sign extend
addr = insn.ea - ((~addr & 0x7ffffff) + 1)
else:
addr = addr + insn.ea
else:
addr = (addr + insn.ea) & 0x7ffffff
return addr
########################################
# Processor Type
########################################
def ana_ops(self, ops, insn):
inst = self.itable[insn.itype]
opcnt = 0
opidx = 0
hascc = False
for w, v in inst.args:
if v[0] == '0' or v[0] == '1':
continue
if v == 'rA' or v == 'rB' or v == 'rC':
insn[opcnt].type = o_reg
insn[opcnt].dtype = dt_dword
insn[opcnt].reg = ops[opidx]
opcnt += 1
elif v == 'Immediate':
insn[opcnt].type = o_imm
insn[opcnt].dtype = dt_dword
insn[opcnt].value = ops[opidx]
opcnt += 1
elif v == 'Location' or v == 'Offset':
insn[opcnt].type = o_near
insn[opcnt].dtype = dt_dword
insn[opcnt].addr = ops[opidx]
opcnt += 1
elif v == 'Register Count':
insn[opcnt - 1].type = o_displ
insn[opcnt - 1].dtype = dt_dword
insn[opcnt - 1].specval = ops[opidx]
insn[opcnt - 1].phrase = ops[opidx - 1]
offset = ops[opidx + 2]
if offset & 0x4000000:
offset -= 0x8000000
insn[opcnt - 1].value = offset
elif v == 'Adjust rB':
insn.auxpref |= ops[opidx] << 5
elif v == 'UF':
insn.auxpref |= ops[opidx] << 4
elif v == 'Condition':
insn.auxpref |= ops[opidx]
hascc = True
elif v == 'Memory Flags':
insn[opcnt].type = o_idpspec0
insn[opcnt].dtype = dt_dword
insn[opcnt].specval = ops[opidx]
opcnt += 1
elif v == 'Memory Offset':
pass
else:
assert False
opidx += 1
if not hascc:
insn.auxpref |= 0xF
def ana(self, insn):
current_ea = insn.ea + insn.size
code_bit = ''
for i in range(6):
code_bit += '{:09b}'.format(get_wide_byte(current_ea + i) & 0x1ff)
code = int(code_bit, 2)
idx = None
for g in self.imatch:
bitlen, masks = g
ops, imap = self.imatch[g]
code2 = fetch(code, bitlen)
bits = tuple(map(lambda x: (code2 & x[0]) >> x[1], masks))
if bits in imap:
idx = imap[bits]
insn.itype = idx
break
if idx is None:
return 0
ana_ops(self, list(map(lambda x: (code2 & x[0]) >> x[1], ops)), insn)
# Remove this block to disable simplifying ML+MH
if insn.itype == self.itype_MH and insn.ea >= 3:
code_bit = ''
last_ea = insn.ea - 3
code_bit = '{:09b}{:09b}{:09b}'.format(get_wide_byte(last_ea+1) & 0x1ff, get_wide_byte(last_ea) & 0x1ff, get_wide_byte(last_ea+2) & 0x1ff)
code = int(code_bit, 2)
if (code >> 22) & 0x1f == 0x12:
regidx = (code >> 17) & 0x1f
lo = code & 0x1ffff
if regidx == insn[0].reg:
hi = insn[1].value
v = (hi << 10) | (lo & 0x3ff)
insn[1].value = v
insn.itype = self.itype_MEH
# ms 0x1ffff -> ms -1
if insn.itype == self.itype_MS and (insn[1].value & 0x10000):
insn[1].value -= 0x20000
bytelen = bitlen // 9
insn.size += bytelen
return bytelen
def add_stkpnt(self, pfn, v):
if pfn:
end = self.cmd.ea + self.cmd.size
if not is_fixed_spd(end):
add_auto_stkpnt2(pfn, end, v)
print(hex(end), v)
def trace_sp(self):
cmd = self.cmd
pfn = get_func(cmd.ea)
if not pfn:
return
if cmd.Op1.type == o_reg and cmd.Op1.reg == self.ireg_ST:
if cmd.Op2.type == o_reg and cmd.Op2.reg == self.ireg_ST:
if cmd.itype == self.itype_SBI:
add_stkpnt(self, pfn, -SIGNEXT(cmd.Op3.value, 7))
elif cmd.itype == self.itype_ADI:
add_stkpnt(self, pfn, SIGNEXT(cmd.Op3.value, 7))
def emu(self, insn):
aux = self.get_auxpref(insn)
flow = False
if insn.itype in [self.itype_B, self.itype_BR, self.itype_BRA, self.itype_BRR]:
if insn.itype != self.itype_BR:
add_cref(0, calc_jump_addr(self, insn.Op1, insn), fl_JN)
if insn.itype not in [self.itype_B, self.itype_BR] or (aux & 0xF) != 0xF:
flow = True
elif insn.itype in [self.itype_C, self.itype_CR, self.itype_CAR, self.itype_CAA]:
if insn.itype != self.itype_CR:
add_cref(insn.Op1.offb, calc_jump_addr(self, insn.Op1, insn), fl_CN)
add_cref(0, insn.ea + insn.size, fl_F)
elif insn.itype in [self.itype_RE, self.itype_HT]:
pass
else:
flow = True
if flow:
add_cref(0, insn.ea + insn.size, fl_F)
if insn.itype in [self.itype_MEH]:
add_dref(insn[1].value, 2, dr_R)
c1 = get_wide_byte(insn[1].value) & 0x1ff
c2 = get_wide_byte(insn[1].value+1) & 0x1ff
c3 = get_wide_byte(insn[1].value+2) & 0x1ff
if c1 >= 0x20 and c1 <= 0x7f \
and c2 >= 0x20 and c2 <= 0x7f \
and c3 >= 0x20 and c3 <= 0x7f:
#MakeCustomDataEx(insn[1].value, 0, self.nstr_dtid, self.nstr_dfid)
create_data(insn[1].value, 0, self.nstr_dtid, self.nstr_dfid)
#if may_trace_sp():
# if flow:
# trace_sp(self)
# else:
# recalc_spd(self.cmd.ea)
return True
def outop(self, ctx, op):
optype = op.type
if optype == o_reg:
ctx.out_register(self.reg_names[op.reg])
elif optype == o_idpspec0:
if op.specval == 0:
ctx.out_symbol('N')
elif op.specval == 1:
ctx.out_symbol('R')
elif op.specval == 2:
ctx.out_symbol('R')
ctx.out_symbol('W')
elif op.specval == 3:
ctx.out_symbol('E')
else:
ctx.out_symbol('E')
ctx.out_symbol('R')
ctx.out_symbol('R')
elif optype == o_imm:
# take size from x.dtyp
ctx.out_value(op, OOFW_32 | OOF_SIGNED)
elif optype == o_near:
addr = op.addr
# offset
if ctx.insn.itype != self.itype_BRA and ctx.insn.itype != self.itype_CAA:
off = 0
if ctx.insn.itype == self.itype_C or ctx.insn.itype == self.itype_B:
if addr & 0x10000 != 0:
# sign extend
off = (~addr & 0x1ffff) + 1
addr = ctx.insn.ea - ((~addr & 0x1ffff) + 1)
ctx.out_symbol('-')
else:
off = addr
addr = addr + ctx.insn.ea
ctx.out_symbol('+')
elif ctx.insn.itype == self.itype_CAR or ctx.insn.itype == self.itype_BRR:
if addr & 0x4000000 != 0:
# sign extend
off = (~addr & 0x7ffffff) + 1
addr = ctx.insn.ea - ((~addr & 0x7ffffff) + 1)
ctx.out_symbol('-')
else:
off = addr
addr = addr + ctx.insn.ea
ctx.out_symbol('+')
else:
off = addr
addr = (addr + ctx.insn.ea) & 0x7ffffff
ctx.out_symbol('+')
ctx.out_btoa(off, 16)
ctx.out_symbol(' ')
ctx.out_symbol('(')
r = ctx.out_name_expr(op, addr, BADADDR)
if not r:
ctx.out_tagon(COLOR_ERROR)
ctx.out_value(op, OOF_ADDR)
ctx.out_tagoff(COLOR_ERROR)
#QueueSet(Q_noName, ctx.insn.ea)
remember_problem(Q_noName, ctx.insn.ea)
ctx.out_symbol(')')
# location
else:
r = ctx.out_name_expr(op, addr, BADADDR)
if not r:
ctx.out_tagon(COLOR_ERROR)
ctx.out_value(op, OOF_ADDR)
out_tagoff(COLOR_ERROR)
remember_problem(Q_noName, ctx.insn.ea)
elif optype == o_displ:
ctx.out_symbol('[')
ctx.out_register(self.reg_names[op.phrase])
ctx.out_value(op, OOFW_32 | OOFS_NEEDSIGN | OOF_SIGNED)
ctx.out_symbol(',')
ctx.out_symbol(' ')
ctx.out_line("%d" % (op.specval + 1))
ctx.out_symbol(']')
return True
class CLEMENCY(processor_t):
# IDP id ( Numbers above 0x8000 are reserved for the third-party modules)
id = 0x8000 + 0x999
# Processor features
flag = PR_ADJSEGS | PRN_HEX | PR_WORD_INS
# Number of bits in a byte for code segments (usually 8)
# IDA supports values up to 32 bits
cnbits = 9
# Number of bits in a byte for non-code segments (usually 8)
# IDA supports values up to 32 bits
dnbits = 9
# short processor names (NULL terminated)
# Each name should be shorter than 9 characters
psnames = ["clemency"]
# long processor names (NULL terminated)
# No restriction on name lengthes.
plnames = ["cLEMENCy"]
segreg_size = 0
##########################
# intruction
# icode of the first instruction
instruc_start = 0
assembler = {
'flag' : ASH_HEXF3 | ASD_DECF0 | ASO_OCTF1 | ASB_BINF3 | AS_ASCIIC ,#| AS_ASCIIZ ,
"uflag": 0,
"name": "GNU assembler",
"origin": ".org",
"end": "end",
"cmnt": ";",
"ascsep": '"',
"accsep": "'",
"esccodes": "\"'",
"a_ascii": ".ascii",
"a_byte": ".byte",
"a_word": ".word",
"a_3byte": ".tribyte",
"a_bss": "dfs %s",
"a_seg": "seg",
"a_curip": ".",
"a_public": "",
"a_weak": "",
"a_extrn": ".extrn",
"a_comdef": "",
"a_align": ".align",
"lbrace": "(",
"rbrace": ")",
"a_mod": "%",
"a_band": "&",
"a_bor": "|",
"a_xor": "^",
"a_bnot": "~",
"a_shl": "<<",
"a_shr": ">>",
"a_sizeof_fmt": "size %s",
}
# flag for auxpref
FL_UF = 0x0010
FL_CC = 0x000F
FL_ADJUST = 0x0060
module = __import__('clemency')
def __init__(self):
processor_t.__init__(self)
# new data format
self.init_data_format()
# reload debug flag
self.doReload = os.getenv('IDA_RELOAD')
# init
self._init_registers()
self._init_instructions()
def init_data_format(self):
self.tribyte_dtid = register_custom_data_type(tribyte_data_type())
self.tribyte_dfid = register_custom_data_format(tribyte_data_format())
self.nstr_dtid = register_custom_data_type(nbit_str_data_type())
self.nstr_dfid = register_custom_data_format(nbit_str_data_format())
def _init_registers(self):
# Registers definition
self.reg_names = ["R%02d" % (i) for i in range(29)] + ["ST", "RA", "PC", "FL"] + ["CS", "DS"]
# Create the ireg_XXXX constants
for i in range(len(self.reg_names)):
setattr(self, 'ireg_' + self.reg_names[i], i)
# Set fake segment registers
self.reg_first_sreg = self.reg_code_sreg = self.ireg_CS
self.reg_last_sreg = self.reg_data_sreg = self.ireg_DS
def _init_instructions(self):
class idef:
def __init__(self, name, cmt, fmt, cf, args):
self.name = name
self.cmt = cmt
self.fmt = fmt
self.cf = cf
self.args = args
self.itable = {}
self.imatch = {}
for j in range(len(inst_json)):
i = inst_json[j]
args = []
for a in i['args']:
args.append((a['width'], a['value']))
# Set itable entry for instruction #j
self.itable[j] = idef(i['name'], i['desc'], i['format'], i['feature'], args)
# Generate matching table entry
ws = sum([w for w, v in args])
off = 0
masks = []
vals = []
for w, v in args:
if v[0] in '01':
masks.append(((1 << (ws - off)) - (1 << (ws - off - w)), ws - off - w))
vals.append(int(v, 2))
off += w
grp = (ws, tuple(masks))
vals = tuple(vals)
if grp not in self.imatch:
ops = []
off = 0
for w, v in args:
if v[0] not in '01':
ops.append(((1 << (ws - off)) - (1 << (ws - off - w)), ws - off - w))
off += w
self.imatch[grp] = (tuple(ops), {}) # (operand mask, inst match table)
self.imatch[grp][1][vals] = j
Instructions = []
for j in range(len(self.itable)):
x = self.itable[j]
d = dict(name = x.name.lower(), feature=x.cf)
if x.cmt:
d['cmt'] = x.cmt
Instructions.append(d)
setattr(self, 'itype_' + x.name, j)
d = dict(name = 'meh', feature=0)
setattr(self, 'itype_MEH', len(Instructions))
Instructions.append(d)
self.instruc_end = len(Instructions) + 1
self.instruc = Instructions
self.icode_return = self.itype_RE
#def ana(self):
def ev_ana_insn(self, insn):
if self.doReload:
reload(self.module)
dynana = getattr(self.module, 'ana')
return dynana(self, insn)
#def emu(self):
def ev_emu_insn(self, insn):
if self.doReload:
reload(self.module)
dynemu = getattr(self.module, 'emu')
return dynemu(self, insn)
cc_table = [
'n',
'e',
'l',
'le',
'g',
'ge',
'no',
'o',
'ns',
's',
'sl',
'sle',
'sg',
'sge',
'',
'',
]
def ev_out_insn(self, ctx):
postfix = ''
# Adjust Register
# e.g., LDSI, LDSD
adjust_flag = (ctx.insn.auxpref & self.FL_ADJUST) >> 5
if adjust_flag == 1:
postfix += 'i'
elif adjust_flag == 2:
postfix += 'd'
# Conditional
# e.g., Bge
cc_idx = ctx.insn.auxpref & self.FL_CC
if cc_idx != 0xf:
idx = ctx.insn.auxpref & self.FL_CC
postfix += self.cc_table[idx]
# Update Flag
# e.g., ad.
if ctx.insn.auxpref & self.FL_UF != 0:
postfix += '.'
#OutMnem(12, postfix)
ctx.out_mnem(12, postfix)
for i in range(6):
op = ctx.insn[i]
if op.type == o_void:
break
if i > 0:
ctx.out_symbol(',')
ctx.out_char(' ')
ctx.out_one_operand(i)
cvar.gl_comm = 1
ctx.flush_outbuf()
return True
def ev_out_operand(self,ctx, op):
if self.doReload:
reload(self.module)
dynoutop = getattr(self.module, 'outop')
return dynoutop(self, ctx, op)
########################################
# Data format for TriBytes (9bits)
########################################
class tribyte_data_type(data_type_t):
ASM_KEYWORD = ".tri"
def __init__(self):
data_type_t.__init__(self,
"py_tribyte",
1,
"TriBytes (9bits)",
'z',
tribyte_data_type.ASM_KEYWORD)
def calc_item_size(self, ea, maxsize):
return 3
class tribyte_data_format(data_format_t):
def __init__(self):
data_format_t.__init__(self,
"py_tribyte_format",
0,
"TriBytes (9bits)")
def printf(self, value, current_ea, operand_num, dtid):
b1 = idaapi.get_wide_byte(current_ea) & 0x1ff
b2 = idaapi.get_wide_byte(current_ea+1) & 0x1ff
b3 = idaapi.get_wide_byte(current_ea+2) & 0x1ff
return hex((b2 << 18) + (b1 << 9) + b3)
class nbit_str_data_type(data_type_t):
ASM_KEYWORD = ".str"
def __init__(self):
data_type_t.__init__(self,
"py_str",
1,
"String (9bits)",
',',
nbit_str_data_type.ASM_KEYWORD)
def calc_item_size(self, ea, maxsize):
r = 0
while True:
c = idaapi.get_wide_byte(ea + r) & 0x1ff
if c == 0 or c < 0x20 or c > 0x7f:
if c != 0x0d and c != 0x0a and c != 0x09 and c != 0x1b:
break
r += 1
return r + 1
class nbit_str_data_format(data_format_t):
def __init__(self):
data_format_t.__init__(self,
"py_str_format",
0,
"String (9bits) format")
def printf(self, value, current_ea, operand_num, dtid):
r = ''
for i in range(len(value) - 1):
c = idaapi.get_wide_byte(current_ea + i) & 0xff
if c == 0x0d:
r += '\\r'
elif c == 0x0a:
r += '\\n'
elif c == 0x09:
r += '\t'
elif c == 0x1b:
r += '^['
else:
r += chr(c)
return '"%s", 0' % (r)
########################################
# Processor Plugin Entry
########################################
def PROCESSOR_ENTRY():
# add proc into module path
script_path = os.path.abspath(__file__)
script_dir = os.path.dirname(script_path)
sys.path.insert(0, script_dir)
return CLEMENCY()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment