bruce30262/disasm.py Secret

## disasm.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from pwn import *
from collections import OrderedDict
import Queue
import json
import inst
import sys

q = Queue.Queue()
data = ""
code = dict()
# dump1: the subleq vm
with open("./dump1", "rb") as f:
    data = f.read()

assert len(data) == 0x8400

def Word(addr):
    global data
    return u16(data[addr:addr+2:])

def add_next_pos(addr):
    global q
    if addr not in code:
        q.put(addr)

def subleq(now, f1, f2, f3):
    global q, code
    now &= 0xffff
    j = dict()
    idx1 = 0x223 + ((f1<<1)&0xffff)
    idx1 &= 0xffff
    idx2 = 0x223 + ((f2<<1)&0xffff)
    idx2 &= 0xffff
    s = "subleq [{:#x}] [{:#x}] [{:#x}]".format(idx1, idx2, f3+0x223)

    j["op1"] = idx1
    j["op2"] = idx2
    if f3:
        next_pos = (((now+2)<<1)&0xffff) + 0x223
        next_pos &= 0xffff
        if idx1 == idx2:
            j['jmp_type'] = "unc" # unconditional jmp
            j['jmp'] = Word(next_pos)
            s += "; JMP {:#x}".format(j['jmp'])
            j['asm'] = s
            code[now] = json.dumps(j)
            add_next_pos(Word(next_pos))
        else:
            j['jmp_type'] = "con" # conditional
            j['jmp'] = (Word(next_pos), now+3) # true, false
            s += "; if [{:#x}] <= 0; JMP {:#x}; else {:#x}".format(idx2, ((Word(next_pos)<<1)+0x223)&0xffff, (((now+3)<<1)+0x223)&0xffff)
            j['asm'] = s
            code[now] = json.dumps(j)
            add_next_pos(Word(next_pos))
            add_next_pos(now+3)
    else:
        j['jmp_type'] = "next" # unconditional jmp
        j['asm'] = s
        code[now] = json.dumps(j)
        add_next_pos(now+3)

# starting point of the disassembler
start = int(sys.argv[1], 16)
q.put(start)

while not q.empty():
    now = q.get()
    cur = (now<<1)&0xffff
    f1 = Word((0x223+cur)&0xffff)
    f2 = Word((0x223+cur+2)&0xffff)
    f3 = Word((0x223+cur+4)&0xffff)
    subleq(now, f1, f2, f3)

od = OrderedDict(sorted(code.items()))
dis = []
black = [0xff, 0x102, 0x105] # black list of the disassembling address
for pos, code in od.iteritems():
    if pos not in black:
        dis.append((pos, code))

token, rev_token = dict(), dict()
cur_token = None
Z = '0x223'
def trans(code):
    """ get token for each subleq inst"""
    def get_token(op):
        global token, cur_token, rev_token
        ret = None
        if op == 0x223:
            return Z
        if op not in token:
            ret = chr(cur_token)
            token[op] = ret
            rev_token[ret] = op
            cur_token += 1
        else:
            ret = token[op]
        return ret

    tk3 = None
    tk1, tk2 = get_token(code["op1"]), get_token(code['op2'])
    if code["jmp_type"] == "unc":
        tk3 = get_token(code['jmp'])

    if not tk3:
        return (tk1, tk2)
    else:
        return (tk1, tk2, tk3)

now = 0
while now < len(dis):
    matched = False
    for ins in inst.instruction: # traverse all instructions
        sz = ins.sz
        token.clear()
        rev_token.clear()
        cur_token = ord('a')
        patt = []

        if now+sz-1 >= len(dis): continue

        for idx in xrange(sz):
            code = json.loads(dis[now+idx][1])
            patt.append(trans(code))

        if (ins.check(patt)):
            print("{:#x}: {}".format((0x223+(dis[now][0]<<1))&0xffff, ins.get_inst(rev_token)))
            matched = True
            now += sz
            break

    if not matched:
        print("{:#x}: {}".format((0x223+(dis[now][0]<<1))&0xffff, json.loads(dis[now][1])["asm"]))
        now += 1

## inst.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

a = "a"
Z = "0x223"
b = "b"
c = "c"

class Inst(object):
    def __init__(self, name, patt, sz):
        """
        name: instruction name
        patt: subleq pattern ( an array )
        sz: subleq instruction number
        """
        self.name = name
        self.patt = patt
        self.sz = sz
    def check(self, input_patt):
        """
        check if a input pattern ( an array ) equals this instruction
        """
        res = input_patt == self.patt
        #print input_patt
        #print self.patt
        return res

class Jmp(Inst):
    def __init__(self, patt):
        super(Jmp, self).__init__("JMP", patt, 1)
    def get_inst(self, rev_token):
        jmp_addr = rev_token["a"]
        return "JMP {:#x}".format((0x223+(jmp_addr<<1))&0xffff)

class Move(Inst):
    def __init__(self, patt):
        super(Move, self).__init__("MOVE", patt, 4)
    def get_inst(self, rev_token):
        dst = rev_token["a"]
        src = rev_token["b"]
        return "MOVE [{:#x}], [{:#x}]".format(dst, src)

class CLR(Inst):
    def __init__(self, patt):
        super(CLR, self).__init__("CLR", patt, 4)
    def get_inst(self, rev_token):
        dst = rev_token["a"]
        return "CLR [{:#x}]".format(dst)

class Add(Inst):
    def __init__(self, patt):
        super(Add, self).__init__("ADD", patt, 3)
    def get_inst(self, rev_token):
        dst = rev_token["b"]
        src = rev_token["a"]
        return "ADD [{:#x}], [{:#x}]".format(dst, src)

instruction = []
instruction.append(Move([(a, a), (b, Z), (Z, a), (Z, Z)]))
#instruction.append(CLR([(a, a), (Z, Z), (Z, a), (Z, Z)]))
instruction.append(Add([(a, Z), (Z, b), (Z, Z)]))
instruction.append(Jmp([(Z, Z, a)]))
	#!/usr/bin/env python
	# -- coding: utf-8 --

	from pwn import *
	from collections import OrderedDict
	import Queue
	import json
	import inst
	import sys

	q = Queue.Queue()
	data = ""
	code = dict()
	# dump1: the subleq vm
	with open("./dump1", "rb") as f:
	data = f.read()

	assert len(data) == 0x8400

	def Word(addr):
	global data
	return u16(data[addr:addr+2:])

	def add_next_pos(addr):
	global q
	if addr not in code:
	q.put(addr)

	def subleq(now, f1, f2, f3):
	global q, code
	now &= 0xffff
	j = dict()
	idx1 = 0x223 + ((f1<<1)&0xffff)
	idx1 &= 0xffff
	idx2 = 0x223 + ((f2<<1)&0xffff)
	idx2 &= 0xffff
	s = "subleq [{:#x}] [{:#x}] [{:#x}]".format(idx1, idx2, f3+0x223)

	j["op1"] = idx1
	j["op2"] = idx2
	if f3:
	next_pos = (((now+2)<<1)&0xffff) + 0x223
	next_pos &= 0xffff
	if idx1 == idx2:
	j['jmp_type'] = "unc" # unconditional jmp
	j['jmp'] = Word(next_pos)
	s += "; JMP {:#x}".format(j['jmp'])
	j['asm'] = s
	code[now] = json.dumps(j)
	add_next_pos(Word(next_pos))
	else:
	j['jmp_type'] = "con" # conditional
	j['jmp'] = (Word(next_pos), now+3) # true, false
	s += "; if [{:#x}] <= 0; JMP {:#x}; else {:#x}".format(idx2, ((Word(next_pos)<<1)+0x223)&0xffff, (((now+3)<<1)+0x223)&0xffff)
	j['asm'] = s
	code[now] = json.dumps(j)
	add_next_pos(Word(next_pos))
	add_next_pos(now+3)
	else:
	j['jmp_type'] = "next" # unconditional jmp
	j['asm'] = s
	code[now] = json.dumps(j)
	add_next_pos(now+3)

	# starting point of the disassembler
	start = int(sys.argv[1], 16)
	q.put(start)

	while not q.empty():
	now = q.get()
	cur = (now<<1)&0xffff
	f1 = Word((0x223+cur)&0xffff)
	f2 = Word((0x223+cur+2)&0xffff)
	f3 = Word((0x223+cur+4)&0xffff)
	subleq(now, f1, f2, f3)

	od = OrderedDict(sorted(code.items()))
	dis = []
	black = [0xff, 0x102, 0x105] # black list of the disassembling address
	for pos, code in od.iteritems():
	if pos not in black:
	dis.append((pos, code))

	token, rev_token = dict(), dict()
	cur_token = None
	Z = '0x223'
	def trans(code):
	""" get token for each subleq inst"""
	def get_token(op):
	global token, cur_token, rev_token
	ret = None
	if op == 0x223:
	return Z
	if op not in token:
	ret = chr(cur_token)
	token[op] = ret
	rev_token[ret] = op
	cur_token += 1
	else:
	ret = token[op]
	return ret

	tk3 = None
	tk1, tk2 = get_token(code["op1"]), get_token(code['op2'])
	if code["jmp_type"] == "unc":
	tk3 = get_token(code['jmp'])

	if not tk3:
	return (tk1, tk2)
	else:
	return (tk1, tk2, tk3)

	now = 0
	while now < len(dis):
	matched = False
	for ins in inst.instruction: # traverse all instructions
	sz = ins.sz
	token.clear()
	rev_token.clear()
	cur_token = ord('a')
	patt = []

	if now+sz-1 >= len(dis): continue

	for idx in xrange(sz):
	code = json.loads(dis[now+idx][1])
	patt.append(trans(code))

	if (ins.check(patt)):
	print("{:#x}: {}".format((0x223+(dis[now][0]<<1))&0xffff, ins.get_inst(rev_token)))
	matched = True
	now += sz
	break

	if not matched:
	print("{:#x}: {}".format((0x223+(dis[now][0]<<1))&0xffff, json.loads(dis[now][1])["asm"]))
	now += 1
	#!/usr/bin/env python
	# -- coding: utf-8 --

	a = "a"
	Z = "0x223"
	b = "b"
	c = "c"

	class Inst(object):
	def __init__(self, name, patt, sz):
	"""
	name: instruction name
	patt: subleq pattern ( an array )
	sz: subleq instruction number
	"""
	self.name = name
	self.patt = patt
	self.sz = sz
	def check(self, input_patt):
	"""
	check if a input pattern ( an array ) equals this instruction
	"""
	res = input_patt == self.patt
	#print input_patt
	#print self.patt
	return res

	class Jmp(Inst):
	def __init__(self, patt):
	super(Jmp, self).__init__("JMP", patt, 1)
	def get_inst(self, rev_token):
	jmp_addr = rev_token["a"]
	return "JMP {:#x}".format((0x223+(jmp_addr<<1))&0xffff)

	class Move(Inst):
	def __init__(self, patt):
	super(Move, self).__init__("MOVE", patt, 4)
	def get_inst(self, rev_token):
	dst = rev_token["a"]
	src = rev_token["b"]
	return "MOVE [{:#x}], [{:#x}]".format(dst, src)

	class CLR(Inst):
	def __init__(self, patt):
	super(CLR, self).__init__("CLR", patt, 4)
	def get_inst(self, rev_token):
	dst = rev_token["a"]
	return "CLR [{:#x}]".format(dst)

	class Add(Inst):
	def __init__(self, patt):
	super(Add, self).__init__("ADD", patt, 3)
	def get_inst(self, rev_token):
	dst = rev_token["b"]
	src = rev_token["a"]
	return "ADD [{:#x}], [{:#x}]".format(dst, src)

	instruction = []
	instruction.append(Move([(a, a), (b, Z), (Z, a), (Z, Z)]))
	#instruction.append(CLR([(a, a), (Z, Z), (Z, a), (Z, Z)]))
	instruction.append(Add([(a, Z), (Z, b), (Z, Z)]))
	instruction.append(Jmp([(Z, Z, a)]))