terrynini/clemency.py

## clemency.py
from idaapi import *
import os
import sys
import re
from clemency_inst import inst_json

########################################
# Decoder Function
########################################
def is_bit_string(strg, search=re.compile(r'[^01]').search):
    return not bool(search(strg))

def SIGNEXT(x, b):
  m = 1 << (b - 1)
  x = x & ((1 << b) - 1)
  return (x ^ m) - m

def fetch(code, n):
    byte1 = (code >> (54 - 9 * 1)) & 0x1ff
    byte2 = (code >> (54 - 9 * 2)) & 0x1ff
    byte3 = (code >> (54 - 9 * 3)) & 0x1ff
    byte4 = (code >> (54 - 9 * 4)) & 0x1ff
    byte5 = (code >> (54 - 9 * 5)) & 0x1ff
    byte6 = (code >> (54 - 9 * 6)) & 0x1ff
    if n == 18:
        return (byte2 << 9) + byte1
    elif n == 27:
        return (byte2 << 18) + (byte1 << 9) + byte3
    elif n == 36:
        return (byte2 << 27) + (byte1 << 18) + (byte3 << 9) + byte4
    elif n == 54:
        return (byte2 << 45) + (byte1 << 36) + (byte3 << 27) + (byte5 << 18) + (byte4 << 9) + byte6

def calc_jump_addr(self, op, insn):
    addr = op.addr
    if insn.itype != self.itype_BRA and insn.itype != self.itype_CAA:
        if insn.itype == self.itype_C or insn.itype == self.itype_B:
            if addr & 0x10000 != 0:
                # sign extend
                addr = insn.ea - ((~addr & 0x1ffff) + 1)
            else:
                addr = addr + insn.ea
        elif insn.itype == self.itype_CAR or insn.itype == self.itype_BRR:
            if addr & 0x4000000 != 0:
                # sign extend
                addr = insn.ea - ((~addr & 0x7ffffff) + 1)
            else:
                addr = addr + insn.ea
        else:
            addr = (addr + insn.ea) & 0x7ffffff
    return addr

########################################
# Processor Type
########################################

def ana_ops(self, ops, insn):
    inst = self.itable[insn.itype]
    opcnt = 0
    opidx = 0
    hascc = False
    for w, v in inst.args:
        if v[0] == '0' or v[0] == '1':
            continue
        if v == 'rA' or v == 'rB' or v == 'rC':
            insn[opcnt].type = o_reg
            insn[opcnt].dtype = dt_dword
            insn[opcnt].reg = ops[opidx]
            opcnt += 1
        elif v == 'Immediate':
            insn[opcnt].type = o_imm
            insn[opcnt].dtype = dt_dword
            insn[opcnt].value = ops[opidx]
            opcnt += 1
        elif v == 'Location' or v == 'Offset':
            insn[opcnt].type = o_near
            insn[opcnt].dtype = dt_dword
            insn[opcnt].addr = ops[opidx]
            opcnt += 1
        elif v == 'Register Count':
            insn[opcnt - 1].type = o_displ
            insn[opcnt - 1].dtype = dt_dword
            insn[opcnt - 1].specval = ops[opidx]
            insn[opcnt - 1].phrase = ops[opidx - 1]
            offset = ops[opidx + 2]
            if offset & 0x4000000:
                offset -= 0x8000000
            insn[opcnt - 1].value = offset
        elif v == 'Adjust rB':
            insn.auxpref |= ops[opidx] << 5
        elif v == 'UF':
            insn.auxpref |= ops[opidx] << 4
        elif v == 'Condition':
            insn.auxpref |= ops[opidx]
            hascc = True
        elif v == 'Memory Flags':
            insn[opcnt].type = o_idpspec0
            insn[opcnt].dtype = dt_dword
            insn[opcnt].specval = ops[opidx]
            opcnt += 1
        elif v == 'Memory Offset':
            pass
        else:
            assert False
        opidx += 1
    if not hascc:
        insn.auxpref |= 0xF

def ana(self, insn):
    current_ea = insn.ea + insn.size
    code_bit = ''
    for i in range(6):
        code_bit += '{:09b}'.format(get_wide_byte(current_ea + i) & 0x1ff)
    code = int(code_bit, 2)
    idx = None
    for g in self.imatch:
        bitlen, masks = g
        ops, imap = self.imatch[g]
        code2 = fetch(code, bitlen)
        bits = tuple(map(lambda x: (code2 & x[0]) >> x[1], masks))
        if bits in imap:
            idx = imap[bits]
            insn.itype = idx
            break
    if idx is None:
        return 0
    ana_ops(self, list(map(lambda x: (code2 & x[0]) >> x[1], ops)), insn)

    # Remove this block to disable simplifying ML+MH
    if insn.itype == self.itype_MH and insn.ea >= 3:
        code_bit = ''
        last_ea = insn.ea - 3
        code_bit = '{:09b}{:09b}{:09b}'.format(get_wide_byte(last_ea+1) & 0x1ff, get_wide_byte(last_ea) & 0x1ff, get_wide_byte(last_ea+2) & 0x1ff)
        code = int(code_bit, 2)
        if (code >> 22) & 0x1f == 0x12:
            regidx = (code >> 17) & 0x1f
            lo = code & 0x1ffff

            if regidx == insn[0].reg:
                hi = insn[1].value
                v = (hi << 10) | (lo & 0x3ff)
                insn[1].value = v
                insn.itype = self.itype_MEH

    # ms 0x1ffff -> ms -1
    if insn.itype == self.itype_MS and (insn[1].value & 0x10000):
        insn[1].value -= 0x20000

    bytelen = bitlen // 9
    insn.size += bytelen
    return bytelen

def add_stkpnt(self, pfn, v):
    if pfn:
        end = self.cmd.ea + self.cmd.size
        if not is_fixed_spd(end):
            add_auto_stkpnt2(pfn, end, v)
            print(hex(end), v)

def trace_sp(self):
    cmd = self.cmd
    pfn = get_func(cmd.ea)
    if not pfn:
        return
    if cmd.Op1.type == o_reg and cmd.Op1.reg == self.ireg_ST:
        if cmd.Op2.type == o_reg and cmd.Op2.reg == self.ireg_ST:
            if cmd.itype == self.itype_SBI:
                add_stkpnt(self, pfn, -SIGNEXT(cmd.Op3.value, 7))
            elif cmd.itype == self.itype_ADI:
                add_stkpnt(self, pfn, SIGNEXT(cmd.Op3.value, 7))

def emu(self, insn):
    aux = self.get_auxpref(insn)

    flow = False
    if insn.itype in [self.itype_B, self.itype_BR, self.itype_BRA, self.itype_BRR]:
        if insn.itype != self.itype_BR:
            add_cref(0, calc_jump_addr(self, insn.Op1, insn), fl_JN)
        if insn.itype not in [self.itype_B, self.itype_BR] or (aux & 0xF) != 0xF:
            flow = True
    elif insn.itype in [self.itype_C, self.itype_CR, self.itype_CAR, self.itype_CAA]:
        if insn.itype != self.itype_CR:
            add_cref(insn.Op1.offb, calc_jump_addr(self, insn.Op1, insn), fl_CN)
        add_cref(0, insn.ea + insn.size, fl_F)
    elif insn.itype in [self.itype_RE, self.itype_HT]:
        pass
    else:
        flow = True

    if flow:
        add_cref(0, insn.ea + insn.size, fl_F)

    if insn.itype in [self.itype_MEH]:
        add_dref(insn[1].value, 2, dr_R)
        c1 = get_wide_byte(insn[1].value) & 0x1ff
        c2 = get_wide_byte(insn[1].value+1) & 0x1ff
        c3 = get_wide_byte(insn[1].value+2) & 0x1ff
        if c1 >= 0x20 and c1 <= 0x7f \
                and c2 >= 0x20 and c2 <= 0x7f \
                and c3 >= 0x20 and c3 <= 0x7f:
            #MakeCustomDataEx(insn[1].value, 0, self.nstr_dtid, self.nstr_dfid)
            create_data(insn[1].value, 0, self.nstr_dtid, self.nstr_dfid)
    #if may_trace_sp():
    #    if flow:
    #        trace_sp(self)
    #    else:
    #        recalc_spd(self.cmd.ea)

    return True

def outop(self, ctx, op):
    optype = op.type
    if optype == o_reg:
        ctx.out_register(self.reg_names[op.reg])
    elif optype == o_idpspec0:
        if op.specval == 0:
            ctx.out_symbol('N')
        elif op.specval == 1:
            ctx.out_symbol('R')
        elif op.specval == 2:
            ctx.out_symbol('R')
            ctx.out_symbol('W')
        elif op.specval == 3:
            ctx.out_symbol('E')
        else:
            ctx.out_symbol('E')
            ctx.out_symbol('R')
            ctx.out_symbol('R')
    elif optype == o_imm:
        # take size from x.dtyp
        ctx.out_value(op, OOFW_32 | OOF_SIGNED)
    elif optype == o_near:
        addr = op.addr
        # offset
        if ctx.insn.itype != self.itype_BRA and ctx.insn.itype != self.itype_CAA:
            off = 0
            if ctx.insn.itype == self.itype_C or ctx.insn.itype == self.itype_B:
                if addr & 0x10000 != 0:
                    # sign extend
                    off = (~addr & 0x1ffff) + 1
                    addr = ctx.insn.ea - ((~addr & 0x1ffff) + 1)
                    ctx.out_symbol('-')
                else:
                    off = addr
                    addr = addr + ctx.insn.ea
                    ctx.out_symbol('+')
            elif ctx.insn.itype == self.itype_CAR or ctx.insn.itype == self.itype_BRR:
                if addr & 0x4000000 != 0:
                    # sign extend
                    off = (~addr & 0x7ffffff) + 1
                    addr = ctx.insn.ea - ((~addr & 0x7ffffff) + 1)
                    ctx.out_symbol('-')
                else:
                    off = addr
                    addr = addr + ctx.insn.ea
                    ctx.out_symbol('+')
            else:
                off = addr
                addr = (addr + ctx.insn.ea) & 0x7ffffff
                ctx.out_symbol('+')

            ctx.out_btoa(off, 16)

            ctx.out_symbol(' ')
            ctx.out_symbol('(')
            r = ctx.out_name_expr(op, addr, BADADDR)
            if not r:
                ctx.out_tagon(COLOR_ERROR)
                ctx.out_value(op, OOF_ADDR)
                ctx.out_tagoff(COLOR_ERROR)
                #QueueSet(Q_noName, ctx.insn.ea)
                remember_problem(Q_noName, ctx.insn.ea)
            ctx.out_symbol(')')
        # location
        else:
            r = ctx.out_name_expr(op, addr, BADADDR)
            if not r:
                ctx.out_tagon(COLOR_ERROR)
                ctx.out_value(op, OOF_ADDR)
                out_tagoff(COLOR_ERROR)
                remember_problem(Q_noName, ctx.insn.ea)
    elif optype == o_displ:
        ctx.out_symbol('[')
        ctx.out_register(self.reg_names[op.phrase])
        ctx.out_value(op, OOFW_32 | OOFS_NEEDSIGN | OOF_SIGNED)
        ctx.out_symbol(',')
        ctx.out_symbol(' ')
        ctx.out_line("%d" % (op.specval + 1))
        ctx.out_symbol(']')

    return True

class CLEMENCY(processor_t):
    # IDP id ( Numbers above 0x8000 are reserved for the third-party modules)
    id = 0x8000 + 0x999
    # Processor features
    flag = PR_ADJSEGS | PRN_HEX | PR_WORD_INS
    # Number of bits in a byte for code segments (usually 8)
    # IDA supports values up to 32 bits
    cnbits = 9
    # Number of bits in a byte for non-code segments (usually 8)
    # IDA supports values up to 32 bits
    dnbits = 9
    # short processor names (NULL terminated)
    # Each name should be shorter than 9 characters
    psnames = ["clemency"]
    # long processor names (NULL terminated)
    # No restriction on name lengthes.
    plnames = ["cLEMENCy"]

    segreg_size = 0

    ##########################
    # intruction
    # icode of the first instruction
    instruc_start = 0

    assembler = {
        'flag' : ASH_HEXF3 | ASD_DECF0 | ASO_OCTF1 | ASB_BINF3 | AS_ASCIIC ,#| AS_ASCIIZ ,
        "uflag": 0,
        "name": "GNU assembler",

        "origin": ".org",
        "end": "end",
        "cmnt": ";",

        "ascsep": '"',
        "accsep": "'",
        "esccodes": "\"'",

        "a_ascii": ".ascii",
        "a_byte": ".byte",
        "a_word": ".word",
        "a_3byte": ".tribyte",

        "a_bss": "dfs %s",

        "a_seg": "seg",
        "a_curip": ".",
        "a_public": "",
        "a_weak": "",
        "a_extrn": ".extrn",
        "a_comdef": "",
        "a_align": ".align",

        "lbrace": "(",
        "rbrace": ")",
        "a_mod": "%",
        "a_band": "&",
        "a_bor": "|",
        "a_xor": "^",
        "a_bnot": "~",
        "a_shl": "<<",
        "a_shr": ">>",
        "a_sizeof_fmt": "size %s",
    }

    # flag for auxpref
    FL_UF              = 0x0010
    FL_CC              = 0x000F
    FL_ADJUST          = 0x0060


    module = __import__('clemency')
    def __init__(self):
        processor_t.__init__(self)
        # new data format
        self.init_data_format()
        # reload debug flag
        self.doReload = os.getenv('IDA_RELOAD')
        # init
        self._init_registers()
        self._init_instructions()

    def init_data_format(self):
        self.tribyte_dtid = register_custom_data_type(tribyte_data_type())
        self.tribyte_dfid = register_custom_data_format(tribyte_data_format())
        self.nstr_dtid = register_custom_data_type(nbit_str_data_type())
        self.nstr_dfid = register_custom_data_format(nbit_str_data_format())


    def _init_registers(self):

        # Registers definition
        self.reg_names = ["R%02d" % (i) for i in range(29)] + ["ST", "RA", "PC", "FL"] + ["CS", "DS"]

        # Create the ireg_XXXX constants
        for i in range(len(self.reg_names)):
            setattr(self, 'ireg_' + self.reg_names[i], i)

        # Set fake segment registers
        self.reg_first_sreg = self.reg_code_sreg = self.ireg_CS
        self.reg_last_sreg = self.reg_data_sreg = self.ireg_DS

    def _init_instructions(self):
        class idef:
            def __init__(self, name, cmt, fmt, cf, args):
                self.name = name
                self.cmt = cmt
                self.fmt = fmt
                self.cf = cf
                self.args = args

        self.itable = {}
        self.imatch = {}

        for j in range(len(inst_json)):
            i = inst_json[j]
            args = []
            for a in i['args']:
                args.append((a['width'], a['value']))

            # Set itable entry for instruction #j
            self.itable[j] = idef(i['name'], i['desc'], i['format'], i['feature'], args)

            # Generate matching table entry
            ws = sum([w for w, v in args])
            off = 0
            masks = []
            vals = []
            for w, v in args:
                if v[0] in '01':
                    masks.append(((1 << (ws - off)) - (1 << (ws - off - w)), ws - off - w))
                    vals.append(int(v, 2))
                off += w
            grp = (ws, tuple(masks))
            vals = tuple(vals)
            if grp not in self.imatch:
                ops = []
                off = 0
                for w, v in args:
                    if v[0] not in '01':
                        ops.append(((1 << (ws - off)) - (1 << (ws - off - w)), ws - off - w))
                    off += w
                self.imatch[grp] = (tuple(ops), {}) # (operand mask, inst match table)
            self.imatch[grp][1][vals] = j

        Instructions = []
        for j in range(len(self.itable)):
            x = self.itable[j]
            d = dict(name = x.name.lower(), feature=x.cf)
            if x.cmt:
                d['cmt'] = x.cmt
            Instructions.append(d)
            setattr(self, 'itype_' + x.name, j)

        d = dict(name = 'meh', feature=0)
        setattr(self, 'itype_MEH', len(Instructions))
        Instructions.append(d)

        self.instruc_end = len(Instructions) + 1
        self.instruc = Instructions
        self.icode_return = self.itype_RE

    #def ana(self):
    def ev_ana_insn(self, insn):
        if self.doReload:
            reload(self.module)
        dynana = getattr(self.module, 'ana')
        return dynana(self, insn)

    #def emu(self):
    def ev_emu_insn(self, insn):
        if self.doReload:
            reload(self.module)
        dynemu = getattr(self.module, 'emu')
        return dynemu(self, insn)

    cc_table = [
            'n',
            'e',
            'l',
            'le',
            'g',
            'ge',
            'no',
            'o',
            'ns',
            's',
            'sl',
            'sle',
            'sg',
            'sge',
            '',
            '',
            ]

    def ev_out_insn(self, ctx):

        postfix = ''
        # Adjust Register
        #   e.g., LDSI, LDSD
        adjust_flag = (ctx.insn.auxpref & self.FL_ADJUST) >> 5
        if adjust_flag == 1:
            postfix += 'i'
        elif adjust_flag == 2:
            postfix += 'd'

        # Conditional
        #   e.g., Bge
        cc_idx = ctx.insn.auxpref & self.FL_CC
        if cc_idx != 0xf:
            idx = ctx.insn.auxpref & self.FL_CC
            postfix += self.cc_table[idx]

        # Update Flag
        #   e.g., ad.
        if ctx.insn.auxpref & self.FL_UF != 0:
            postfix += '.'

        #OutMnem(12, postfix)
        ctx.out_mnem(12, postfix)

        for i in range(6):
            op = ctx.insn[i]

            if op.type == o_void:
                break

            if i > 0:
                ctx.out_symbol(',')
                ctx.out_char(' ')
            ctx.out_one_operand(i)

        cvar.gl_comm = 1
        ctx.flush_outbuf()
        return True

    def ev_out_operand(self,ctx, op):
        if self.doReload:
            reload(self.module)
        dynoutop = getattr(self.module, 'outop')
        return dynoutop(self, ctx, op)

########################################
# Data format for TriBytes (9bits)
########################################
class tribyte_data_type(data_type_t):
    ASM_KEYWORD = ".tri"
    def __init__(self):
        data_type_t.__init__(self,
                             "py_tribyte",
                             1,
                             "TriBytes (9bits)",
                             'z',
                             tribyte_data_type.ASM_KEYWORD)

    def calc_item_size(self, ea, maxsize):
        return 3

class tribyte_data_format(data_format_t):
    def __init__(self):
        data_format_t.__init__(self,
                               "py_tribyte_format",
                               0,
                               "TriBytes (9bits)")

    def printf(self, value, current_ea, operand_num, dtid):
        b1 = idaapi.get_wide_byte(current_ea) & 0x1ff
        b2 = idaapi.get_wide_byte(current_ea+1) & 0x1ff
        b3 = idaapi.get_wide_byte(current_ea+2) & 0x1ff
        return hex((b2 << 18) + (b1 << 9) + b3)

class nbit_str_data_type(data_type_t):
    ASM_KEYWORD = ".str"
    def __init__(self):
        data_type_t.__init__(self,
                             "py_str",
                             1,
                             "String (9bits)",
                             ',',
                             nbit_str_data_type.ASM_KEYWORD)

    def calc_item_size(self, ea, maxsize):
        r = 0
        while True:
            c = idaapi.get_wide_byte(ea + r) & 0x1ff
            if c == 0 or c < 0x20 or c > 0x7f:
                if c != 0x0d and c != 0x0a and c != 0x09 and c != 0x1b:
                    break
            r += 1

        return r + 1

class nbit_str_data_format(data_format_t):
    def __init__(self):
        data_format_t.__init__(self,
                               "py_str_format",
                               0,
                               "String (9bits) format")

    def printf(self, value, current_ea, operand_num, dtid):
        r = ''
        for i in range(len(value) - 1):
            c = idaapi.get_wide_byte(current_ea + i) & 0xff
            if c == 0x0d:
                r += '\\r'
            elif c == 0x0a:
                r += '\\n'
            elif c == 0x09:
                r += '\t'
            elif c == 0x1b:
                r += '^['
            else:
                r += chr(c)
        return '"%s", 0' % (r)

########################################
# Processor Plugin Entry
########################################
def PROCESSOR_ENTRY():
    # add proc into module path
    script_path = os.path.abspath(__file__)
    script_dir = os.path.dirname(script_path)
    sys.path.insert(0, script_dir)
    return CLEMENCY()
	from idaapi import *
	import os
	import sys
	import re
	from clemency_inst import inst_json

	########################################
	# Decoder Function
	########################################
	def is_bit_string(strg, search=re.compile(r'[^01]').search):
	return not bool(search(strg))

	def SIGNEXT(x, b):
	m = 1 << (b - 1)
	x = x & ((1 << b) - 1)
	return (x ^ m) - m

	def fetch(code, n):
	byte1 = (code >> (54 - 9 * 1)) & 0x1ff
	byte2 = (code >> (54 - 9 * 2)) & 0x1ff
	byte3 = (code >> (54 - 9 * 3)) & 0x1ff
	byte4 = (code >> (54 - 9 * 4)) & 0x1ff
	byte5 = (code >> (54 - 9 * 5)) & 0x1ff
	byte6 = (code >> (54 - 9 * 6)) & 0x1ff
	if n == 18:
	return (byte2 << 9) + byte1
	elif n == 27:
	return (byte2 << 18) + (byte1 << 9) + byte3
	elif n == 36:
	return (byte2 << 27) + (byte1 << 18) + (byte3 << 9) + byte4
	elif n == 54:
	return (byte2 << 45) + (byte1 << 36) + (byte3 << 27) + (byte5 << 18) + (byte4 << 9) + byte6

	def calc_jump_addr(self, op, insn):
	addr = op.addr
	if insn.itype != self.itype_BRA and insn.itype != self.itype_CAA:
	if insn.itype == self.itype_C or insn.itype == self.itype_B:
	if addr & 0x10000 != 0:
	# sign extend
	addr = insn.ea - ((~addr & 0x1ffff) + 1)
	else:
	addr = addr + insn.ea
	elif insn.itype == self.itype_CAR or insn.itype == self.itype_BRR:
	if addr & 0x4000000 != 0:
	# sign extend
	addr = insn.ea - ((~addr & 0x7ffffff) + 1)
	else:
	addr = addr + insn.ea
	else:
	addr = (addr + insn.ea) & 0x7ffffff
	return addr

	########################################
	# Processor Type
	########################################

	def ana_ops(self, ops, insn):
	inst = self.itable[insn.itype]
	opcnt = 0
	opidx = 0
	hascc = False
	for w, v in inst.args:
	if v[0] == '0' or v[0] == '1':
	continue
	if v == 'rA' or v == 'rB' or v == 'rC':
	insn[opcnt].type = o_reg
	insn[opcnt].dtype = dt_dword
	insn[opcnt].reg = ops[opidx]
	opcnt += 1
	elif v == 'Immediate':
	insn[opcnt].type = o_imm
	insn[opcnt].dtype = dt_dword
	insn[opcnt].value = ops[opidx]
	opcnt += 1
	elif v == 'Location' or v == 'Offset':
	insn[opcnt].type = o_near
	insn[opcnt].dtype = dt_dword
	insn[opcnt].addr = ops[opidx]
	opcnt += 1
	elif v == 'Register Count':
	insn[opcnt - 1].type = o_displ
	insn[opcnt - 1].dtype = dt_dword
	insn[opcnt - 1].specval = ops[opidx]
	insn[opcnt - 1].phrase = ops[opidx - 1]
	offset = ops[opidx + 2]
	if offset & 0x4000000:
	offset -= 0x8000000
	insn[opcnt - 1].value = offset
	elif v == 'Adjust rB':
	insn.auxpref \|= ops[opidx] << 5
	elif v == 'UF':
	insn.auxpref \|= ops[opidx] << 4
	elif v == 'Condition':
	insn.auxpref \|= ops[opidx]
	hascc = True
	elif v == 'Memory Flags':
	insn[opcnt].type = o_idpspec0
	insn[opcnt].dtype = dt_dword
	insn[opcnt].specval = ops[opidx]
	opcnt += 1
	elif v == 'Memory Offset':
	pass
	else:
	assert False
	opidx += 1
	if not hascc:
	insn.auxpref \|= 0xF

	def ana(self, insn):
	current_ea = insn.ea + insn.size
	code_bit = ''
	for i in range(6):
	code_bit += '{:09b}'.format(get_wide_byte(current_ea + i) & 0x1ff)
	code = int(code_bit, 2)
	idx = None
	for g in self.imatch:
	bitlen, masks = g
	ops, imap = self.imatch[g]
	code2 = fetch(code, bitlen)
	bits = tuple(map(lambda x: (code2 & x[0]) >> x[1], masks))
	if bits in imap:
	idx = imap[bits]
	insn.itype = idx
	break
	if idx is None:
	return 0
	ana_ops(self, list(map(lambda x: (code2 & x[0]) >> x[1], ops)), insn)

	# Remove this block to disable simplifying ML+MH
	if insn.itype == self.itype_MH and insn.ea >= 3:
	code_bit = ''
	last_ea = insn.ea - 3
	code_bit = '{:09b}{:09b}{:09b}'.format(get_wide_byte(last_ea+1) & 0x1ff, get_wide_byte(last_ea) & 0x1ff, get_wide_byte(last_ea+2) & 0x1ff)
	code = int(code_bit, 2)
	if (code >> 22) & 0x1f == 0x12:
	regidx = (code >> 17) & 0x1f
	lo = code & 0x1ffff

	if regidx == insn[0].reg:
	hi = insn[1].value
	v = (hi << 10) \| (lo & 0x3ff)
	insn[1].value = v
	insn.itype = self.itype_MEH

	# ms 0x1ffff -> ms -1
	if insn.itype == self.itype_MS and (insn[1].value & 0x10000):
	insn[1].value -= 0x20000

	bytelen = bitlen // 9
	insn.size += bytelen
	return bytelen

	def add_stkpnt(self, pfn, v):
	if pfn:
	end = self.cmd.ea + self.cmd.size
	if not is_fixed_spd(end):
	add_auto_stkpnt2(pfn, end, v)
	print(hex(end), v)

	def trace_sp(self):
	cmd = self.cmd
	pfn = get_func(cmd.ea)
	if not pfn:
	return
	if cmd.Op1.type == o_reg and cmd.Op1.reg == self.ireg_ST:
	if cmd.Op2.type == o_reg and cmd.Op2.reg == self.ireg_ST:
	if cmd.itype == self.itype_SBI:
	add_stkpnt(self, pfn, -SIGNEXT(cmd.Op3.value, 7))
	elif cmd.itype == self.itype_ADI:
	add_stkpnt(self, pfn, SIGNEXT(cmd.Op3.value, 7))

	def emu(self, insn):
	aux = self.get_auxpref(insn)

	flow = False
	if insn.itype in [self.itype_B, self.itype_BR, self.itype_BRA, self.itype_BRR]:
	if insn.itype != self.itype_BR:
	add_cref(0, calc_jump_addr(self, insn.Op1, insn), fl_JN)
	if insn.itype not in [self.itype_B, self.itype_BR] or (aux & 0xF) != 0xF:
	flow = True
	elif insn.itype in [self.itype_C, self.itype_CR, self.itype_CAR, self.itype_CAA]:
	if insn.itype != self.itype_CR:
	add_cref(insn.Op1.offb, calc_jump_addr(self, insn.Op1, insn), fl_CN)
	add_cref(0, insn.ea + insn.size, fl_F)
	elif insn.itype in [self.itype_RE, self.itype_HT]:
	pass
	else:
	flow = True

	if flow:
	add_cref(0, insn.ea + insn.size, fl_F)

	if insn.itype in [self.itype_MEH]:
	add_dref(insn[1].value, 2, dr_R)
	c1 = get_wide_byte(insn[1].value) & 0x1ff
	c2 = get_wide_byte(insn[1].value+1) & 0x1ff
	c3 = get_wide_byte(insn[1].value+2) & 0x1ff
	if c1 >= 0x20 and c1 <= 0x7f \
	and c2 >= 0x20 and c2 <= 0x7f \
	and c3 >= 0x20 and c3 <= 0x7f:
	#MakeCustomDataEx(insn[1].value, 0, self.nstr_dtid, self.nstr_dfid)
	create_data(insn[1].value, 0, self.nstr_dtid, self.nstr_dfid)
	#if may_trace_sp():
	# if flow:
	# trace_sp(self)
	# else:
	# recalc_spd(self.cmd.ea)

	return True

	def outop(self, ctx, op):
	optype = op.type
	if optype == o_reg:
	ctx.out_register(self.reg_names[op.reg])
	elif optype == o_idpspec0:
	if op.specval == 0:
	ctx.out_symbol('N')
	elif op.specval == 1:
	ctx.out_symbol('R')
	elif op.specval == 2:
	ctx.out_symbol('R')
	ctx.out_symbol('W')
	elif op.specval == 3:
	ctx.out_symbol('E')
	else:
	ctx.out_symbol('E')
	ctx.out_symbol('R')
	ctx.out_symbol('R')
	elif optype == o_imm:
	# take size from x.dtyp
	ctx.out_value(op, OOFW_32 \| OOF_SIGNED)
	elif optype == o_near:
	addr = op.addr
	# offset
	if ctx.insn.itype != self.itype_BRA and ctx.insn.itype != self.itype_CAA:
	off = 0
	if ctx.insn.itype == self.itype_C or ctx.insn.itype == self.itype_B:
	if addr & 0x10000 != 0:
	# sign extend
	off = (~addr & 0x1ffff) + 1
	addr = ctx.insn.ea - ((~addr & 0x1ffff) + 1)
	ctx.out_symbol('-')
	else:
	off = addr
	addr = addr + ctx.insn.ea
	ctx.out_symbol('+')
	elif ctx.insn.itype == self.itype_CAR or ctx.insn.itype == self.itype_BRR:
	if addr & 0x4000000 != 0:
	# sign extend
	off = (~addr & 0x7ffffff) + 1
	addr = ctx.insn.ea - ((~addr & 0x7ffffff) + 1)
	ctx.out_symbol('-')
	else:
	off = addr
	addr = addr + ctx.insn.ea
	ctx.out_symbol('+')
	else:
	off = addr
	addr = (addr + ctx.insn.ea) & 0x7ffffff
	ctx.out_symbol('+')

	ctx.out_btoa(off, 16)

	ctx.out_symbol(' ')
	ctx.out_symbol('(')
	r = ctx.out_name_expr(op, addr, BADADDR)
	if not r:
	ctx.out_tagon(COLOR_ERROR)
	ctx.out_value(op, OOF_ADDR)
	ctx.out_tagoff(COLOR_ERROR)
	#QueueSet(Q_noName, ctx.insn.ea)
	remember_problem(Q_noName, ctx.insn.ea)
	ctx.out_symbol(')')
	# location
	else:
	r = ctx.out_name_expr(op, addr, BADADDR)
	if not r:
	ctx.out_tagon(COLOR_ERROR)
	ctx.out_value(op, OOF_ADDR)
	out_tagoff(COLOR_ERROR)
	remember_problem(Q_noName, ctx.insn.ea)
	elif optype == o_displ:
	ctx.out_symbol('[')
	ctx.out_register(self.reg_names[op.phrase])
	ctx.out_value(op, OOFW_32 \| OOFS_NEEDSIGN \| OOF_SIGNED)
	ctx.out_symbol(',')
	ctx.out_symbol(' ')
	ctx.out_line("%d" % (op.specval + 1))
	ctx.out_symbol(']')

	return True

	class CLEMENCY(processor_t):
	# IDP id ( Numbers above 0x8000 are reserved for the third-party modules)
	id = 0x8000 + 0x999
	# Processor features
	flag = PR_ADJSEGS \| PRN_HEX \| PR_WORD_INS
	# Number of bits in a byte for code segments (usually 8)
	# IDA supports values up to 32 bits
	cnbits = 9
	# Number of bits in a byte for non-code segments (usually 8)
	# IDA supports values up to 32 bits
	dnbits = 9
	# short processor names (NULL terminated)
	# Each name should be shorter than 9 characters
	psnames = ["clemency"]
	# long processor names (NULL terminated)
	# No restriction on name lengthes.
	plnames = ["cLEMENCy"]

	segreg_size = 0

	##########################
	# intruction
	# icode of the first instruction
	instruc_start = 0

	assembler = {
	'flag' : ASH_HEXF3 \| ASD_DECF0 \| ASO_OCTF1 \| ASB_BINF3 \| AS_ASCIIC ,#\| AS_ASCIIZ ,
	"uflag": 0,
	"name": "GNU assembler",

	"origin": ".org",
	"end": "end",
	"cmnt": ";",

	"ascsep": '"',
	"accsep": "'",
	"esccodes": "\"'",

	"a_ascii": ".ascii",
	"a_byte": ".byte",
	"a_word": ".word",
	"a_3byte": ".tribyte",

	"a_bss": "dfs %s",

	"a_seg": "seg",
	"a_curip": ".",
	"a_public": "",
	"a_weak": "",
	"a_extrn": ".extrn",
	"a_comdef": "",
	"a_align": ".align",

	"lbrace": "(",
	"rbrace": ")",
	"a_mod": "%",
	"a_band": "&",
	"a_bor": "\|",
	"a_xor": "^",
	"a_bnot": "~",
	"a_shl": "<<",
	"a_shr": ">>",
	"a_sizeof_fmt": "size %s",
	}

	# flag for auxpref
	FL_UF = 0x0010
	FL_CC = 0x000F
	FL_ADJUST = 0x0060


	module = __import__('clemency')
	def __init__(self):
	processor_t.__init__(self)
	# new data format
	self.init_data_format()
	# reload debug flag
	self.doReload = os.getenv('IDA_RELOAD')
	# init
	self._init_registers()
	self._init_instructions()

	def init_data_format(self):
	self.tribyte_dtid = register_custom_data_type(tribyte_data_type())
	self.tribyte_dfid = register_custom_data_format(tribyte_data_format())
	self.nstr_dtid = register_custom_data_type(nbit_str_data_type())
	self.nstr_dfid = register_custom_data_format(nbit_str_data_format())


	def _init_registers(self):

	# Registers definition
	self.reg_names = ["R%02d" % (i) for i in range(29)] + ["ST", "RA", "PC", "FL"] + ["CS", "DS"]

	# Create the ireg_XXXX constants
	for i in range(len(self.reg_names)):
	setattr(self, 'ireg_' + self.reg_names[i], i)

	# Set fake segment registers
	self.reg_first_sreg = self.reg_code_sreg = self.ireg_CS
	self.reg_last_sreg = self.reg_data_sreg = self.ireg_DS

	def _init_instructions(self):
	class idef:
	def __init__(self, name, cmt, fmt, cf, args):
	self.name = name
	self.cmt = cmt
	self.fmt = fmt
	self.cf = cf
	self.args = args

	self.itable = {}
	self.imatch = {}

	for j in range(len(inst_json)):
	i = inst_json[j]
	args = []
	for a in i['args']:
	args.append((a['width'], a['value']))

	# Set itable entry for instruction #j
	self.itable[j] = idef(i['name'], i['desc'], i['format'], i['feature'], args)

	# Generate matching table entry
	ws = sum([w for w, v in args])
	off = 0
	masks = []
	vals = []
	for w, v in args:
	if v[0] in '01':
	masks.append(((1 << (ws - off)) - (1 << (ws - off - w)), ws - off - w))
	vals.append(int(v, 2))
	off += w
	grp = (ws, tuple(masks))
	vals = tuple(vals)
	if grp not in self.imatch:
	ops = []
	off = 0
	for w, v in args:
	if v[0] not in '01':
	ops.append(((1 << (ws - off)) - (1 << (ws - off - w)), ws - off - w))
	off += w
	self.imatch[grp] = (tuple(ops), {}) # (operand mask, inst match table)
	self.imatch[grp][1][vals] = j

	Instructions = []
	for j in range(len(self.itable)):
	x = self.itable[j]
	d = dict(name = x.name.lower(), feature=x.cf)
	if x.cmt:
	d['cmt'] = x.cmt
	Instructions.append(d)
	setattr(self, 'itype_' + x.name, j)

	d = dict(name = 'meh', feature=0)
	setattr(self, 'itype_MEH', len(Instructions))
	Instructions.append(d)

	self.instruc_end = len(Instructions) + 1
	self.instruc = Instructions
	self.icode_return = self.itype_RE

	#def ana(self):
	def ev_ana_insn(self, insn):
	if self.doReload:
	reload(self.module)
	dynana = getattr(self.module, 'ana')
	return dynana(self, insn)

	#def emu(self):
	def ev_emu_insn(self, insn):
	if self.doReload:
	reload(self.module)
	dynemu = getattr(self.module, 'emu')
	return dynemu(self, insn)

	cc_table = [
	'n',
	'e',
	'l',
	'le',
	'g',
	'ge',
	'no',
	'o',
	'ns',
	's',
	'sl',
	'sle',
	'sg',
	'sge',
	'',
	'',
	]

	def ev_out_insn(self, ctx):

	postfix = ''
	# Adjust Register
	# e.g., LDSI, LDSD
	adjust_flag = (ctx.insn.auxpref & self.FL_ADJUST) >> 5
	if adjust_flag == 1:
	postfix += 'i'
	elif adjust_flag == 2:
	postfix += 'd'

	# Conditional
	# e.g., Bge
	cc_idx = ctx.insn.auxpref & self.FL_CC
	if cc_idx != 0xf:
	idx = ctx.insn.auxpref & self.FL_CC
	postfix += self.cc_table[idx]

	# Update Flag
	# e.g., ad.
	if ctx.insn.auxpref & self.FL_UF != 0:
	postfix += '.'

	#OutMnem(12, postfix)
	ctx.out_mnem(12, postfix)

	for i in range(6):
	op = ctx.insn[i]

	if op.type == o_void:
	break

	if i > 0:
	ctx.out_symbol(',')
	ctx.out_char(' ')
	ctx.out_one_operand(i)

	cvar.gl_comm = 1
	ctx.flush_outbuf()
	return True

	def ev_out_operand(self,ctx, op):
	if self.doReload:
	reload(self.module)
	dynoutop = getattr(self.module, 'outop')
	return dynoutop(self, ctx, op)

	########################################
	# Data format for TriBytes (9bits)
	########################################
	class tribyte_data_type(data_type_t):
	ASM_KEYWORD = ".tri"
	def __init__(self):
	data_type_t.__init__(self,
	"py_tribyte",
	1,
	"TriBytes (9bits)",
	'z',
	tribyte_data_type.ASM_KEYWORD)

	def calc_item_size(self, ea, maxsize):
	return 3

	class tribyte_data_format(data_format_t):
	def __init__(self):
	data_format_t.__init__(self,
	"py_tribyte_format",
	0,
	"TriBytes (9bits)")

	def printf(self, value, current_ea, operand_num, dtid):
	b1 = idaapi.get_wide_byte(current_ea) & 0x1ff
	b2 = idaapi.get_wide_byte(current_ea+1) & 0x1ff
	b3 = idaapi.get_wide_byte(current_ea+2) & 0x1ff
	return hex((b2 << 18) + (b1 << 9) + b3)

	class nbit_str_data_type(data_type_t):
	ASM_KEYWORD = ".str"
	def __init__(self):
	data_type_t.__init__(self,
	"py_str",
	1,
	"String (9bits)",
	',',
	nbit_str_data_type.ASM_KEYWORD)

	def calc_item_size(self, ea, maxsize):
	r = 0
	while True:
	c = idaapi.get_wide_byte(ea + r) & 0x1ff
	if c == 0 or c < 0x20 or c > 0x7f:
	if c != 0x0d and c != 0x0a and c != 0x09 and c != 0x1b:
	break
	r += 1

	return r + 1

	class nbit_str_data_format(data_format_t):
	def __init__(self):
	data_format_t.__init__(self,
	"py_str_format",
	0,
	"String (9bits) format")

	def printf(self, value, current_ea, operand_num, dtid):
	r = ''
	for i in range(len(value) - 1):
	c = idaapi.get_wide_byte(current_ea + i) & 0xff
	if c == 0x0d:
	r += '\\r'
	elif c == 0x0a:
	r += '\\n'
	elif c == 0x09:
	r += '\t'
	elif c == 0x1b:
	r += '^['
	else:
	r += chr(c)
	return '"%s", 0' % (r)

	########################################
	# Processor Plugin Entry
	########################################
	def PROCESSOR_ENTRY():
	# add proc into module path
	script_path = os.path.abspath(__file__)
	script_dir = os.path.dirname(script_path)
	sys.path.insert(0, script_dir)
	return CLEMENCY()