laomaiweng/x86emu.py

## x86emu.py
#!/usr/bin/env python3

from dataclasses import dataclass
import os
import re
import sys

from intervaltree import Interval, IntervalTree
from pygments import highlight
from pygments.formatters import TerminalFormatter
from pwnlib.asm import disasm
from pwnlib.lexer import PwntoolsLexer
from pwnlib.util.fiddling import hexdump
from unicorn import *
from unicorn.x86_const import *

#                         addr              +size                :hex                           @file
MAPMEM_RE = re.compile(r'((?:0x)?[0-9]+)(?:\+((?:0x)?[0-9]+))?(?::((?:[0-9a-fA-F][0-9a-fA-F])+)|@(.+))?')
PAGE_SIZE = 0x1000
BITNESS = {64: UC_MODE_64, 32: UC_MODE_32}
PWN_ARCH = {64: 'amd64', 32: 'i386'}
USER_REGISTERS = {'rax': UC_X86_REG_RAX, 'rbx': UC_X86_REG_RBX, 'rcx': UC_X86_REG_RCX, 'rdx': UC_X86_REG_RDX, 'rbp': UC_X86_REG_RBP, 'rsp': UC_X86_REG_RSP}
ALL_REGISTERS = USER_REGISTERS.copy()
ALL_REGISTERS.update({'rip': UC_X86_REG_RIP, 'rflags': UC_X86_REG_RFLAGS})

base0int = lambda x: int(x, base=0)

@dataclass
class MapMem:
    addr: int
    size: int | None
    data: bytes

    @classmethod
    def fromstr(cls, s):
        m = MAPMEM_RE.fullmatch(s)
        if m is None:
            raise RuntimeError(f'Syntax error in --map-mem spec: {s}')
        print(f'[MapMem::fromstr] addr={m.group(1)} size={m.group(2)} hex={m.group(3)} file={m.group(4)}')

        # addr
        addr = int(m.group(1), base=0)

        # size
        size = m.group(2)
        if size is not None:
            size = int(size, base=0)

        # data
        data = b''
        hex = m.group(3)
        if hex is not None:
            data = bytes.fromhex(hex)
        fname = m.group(4)
        if fname is not None:
            with open(fname, 'rb') as fd:
                data = fd.read()

        # trunc/pad data
        if size is None:
            size = len(data)
        elif size < len(data):
            data = data[:size]
        elif len(data) < size:
            padlen = size - len(data)
            data = data + b'\x00'*padlen
        assert size == len(data)

        print(f'[MapMem::fromstr]   MapMem(addr={addr:#x}, size={size:#x}, data=\'{data.hex()}\')')
        return MapMem(addr, size, data)

@dataclass
class DumpMem:
    addr: int
    size: int

    @classmethod
    def fromstr(cls, s):
        return cls(*map(base0int, s.split('+')))

def align_down(n, a):
    return n & ~(a-1)

def align_up(n, a):
    return (n + (a-1)) & ~(a-1)

def mem_interval(addr, size):
    page = align_down(addr, PAGE_SIZE)
    span = align_up(addr - page + size, PAGE_SIZE)
    return Interval(page, page+span)

# lifted from pwnlib.commandline.disasm
def asmdump(code, entry, arch):
    offsets = disasm(code, vma=entry, instructions=False, byte=False, arch=arch)
    bytes   = disasm(code, vma=entry, instructions=False, offset=False, arch=arch)
    instrs  = disasm(code, vma=entry, byte=False, offset=False, arch=arch)
    instrs  = highlight(instrs, PwntoolsLexer(), TerminalFormatter())

    for o,b,i in zip(*map(str.splitlines, (offsets, bytes, instrs))):
        print(o,b,i)

def main(args):
    # grab/decode code to emulate
    if len(args.code) == 0:
        # default to hex if input is from a terminal
        ifmt = args.input_format or ('hex' if os.isatty(0) else 'raw')
        if ifmt == 'hex':
            code = bytes.fromhex(sys.stdin.read())
        else:
            code = sys.stdin.buffer.read()
    else:
        # default to hex
        ifmt = args.input_format or 'hex'
        if ifmt == 'hex':
            code = bytes.fromhex(''.join(args.code))
        else:
            raise RuntimeError('Cannot take raw bytes through arguments, use hex input format or stdin.')

    # dump code
    print('Code:')
    print(hexdump(code, begin=args.entry))
    asmdump(code, args.entry, PWN_ARCH[args.bitness])
    print()

    # build an interval tree of all mapped memory
    memmap = IntervalTree()
    memmap.add(mem_interval(args.entry, len(code)))
    for mem in args.map_mem:
        memmap.add(mem_interval(mem.addr, mem.size))
    # merge overlapping intervals (Unicorn chokes when multiple mappings overlap)
    memmap.merge_overlaps()

    # initialize user registers
    mu = Uc(UC_ARCH_X86, BITNESS[args.bitness])
    for reg, rnum in USER_REGISTERS.items():
        rval = getattr(args, reg)
        if rval is not None:
            mu.reg_write(rnum, rval)

    # map memory (using intervals from the interval tree)
    for iv in memmap:
        mu.mem_map(iv.begin, iv.end-iv.begin)

    # init memory
    for mem in args.map_mem:
        mu.mem_write(mem.addr, mem.data)

    # init code
    mu.mem_write(args.entry, code)

    # emulate code in infinite time & unlimited instructions
    print('Emulating...')
    try:
        mu.emu_start(args.entry, args.entry + len(code))
        print('Emulation done.')
    except UcError as e:
        print(f'Emulation crashed: {e}')
        print('  (RIP below may be inaccurate)')
    finally:
        # dump registers
        print()
        for reg, rnum in ALL_REGISTERS.items():
            rval = mu.reg_read(rnum)
            print(f'>>> {reg.upper()} = {rval:#x}')
        # dump memory
        for mem in args.dump_mem:
            data = mu.mem_read(mem.addr, mem.size)
            print()
            print(f'>>> @{mem.addr:#x} +{mem.size:#x}')
            print(hexdump(data, begin=mem.addr))

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser('x86emu')
    for reg in USER_REGISTERS.keys():
        parser.add_argument(f'--{reg}', type=base0int, help=f'initial value for {reg.upper()}')
    parser.add_argument('-e', '--entry', default=0, type=base0int, help='entry point')
    parser.add_argument('-b', '--bitness', default=64, type=int, choices=[64, 32], help='CPU mode (default: 64)')
    parser.add_argument('-i', '--input-format', choices=['hex', 'raw'], help='code format: hex, raw (for stdin only)')
    parser.add_argument('-m', '--map-mem', action='append', default=[], type=MapMem.fromstr, help='map specified memory range (format: <addr>[+<size>][:<hex>|@<file>]) at start of emulation')
    parser.add_argument('-d', '--dump-mem', action='append', default=[], type=DumpMem.fromstr, help='dump specified memory range (format: <addr>+<size>) at end of emulation')
    parser.add_argument('code', nargs=argparse.REMAINDER, help='instructions to emulate (read from stdin if none)')
    main(parser.parse_args())
	#!/usr/bin/env python3

	from dataclasses import dataclass
	import os
	import re
	import sys

	from intervaltree import Interval, IntervalTree
	from pygments import highlight
	from pygments.formatters import TerminalFormatter
	from pwnlib.asm import disasm
	from pwnlib.lexer import PwntoolsLexer
	from pwnlib.util.fiddling import hexdump
	from unicorn import *
	from unicorn.x86_const import *

	# addr +size :hex @file
	MAPMEM_RE = re.compile(r'((?:0x)?[0-9]+)(?:\+((?:0x)?[0-9]+))?(?::((?:[0-9a-fA-F][0-9a-fA-F])+)\|@(.+))?')
	PAGE_SIZE = 0x1000
	BITNESS = {64: UC_MODE_64, 32: UC_MODE_32}
	PWN_ARCH = {64: 'amd64', 32: 'i386'}
	USER_REGISTERS = {'rax': UC_X86_REG_RAX, 'rbx': UC_X86_REG_RBX, 'rcx': UC_X86_REG_RCX, 'rdx': UC_X86_REG_RDX, 'rbp': UC_X86_REG_RBP, 'rsp': UC_X86_REG_RSP}
	ALL_REGISTERS = USER_REGISTERS.copy()
	ALL_REGISTERS.update({'rip': UC_X86_REG_RIP, 'rflags': UC_X86_REG_RFLAGS})

	base0int = lambda x: int(x, base=0)

	@dataclass
	class MapMem:
	addr: int
	size: int \| None
	data: bytes

	@classmethod
	def fromstr(cls, s):
	m = MAPMEM_RE.fullmatch(s)
	if m is None:
	raise RuntimeError(f'Syntax error in --map-mem spec: {s}')
	print(f'[MapMem::fromstr] addr={m.group(1)} size={m.group(2)} hex={m.group(3)} file={m.group(4)}')

	# addr
	addr = int(m.group(1), base=0)

	# size
	size = m.group(2)
	if size is not None:
	size = int(size, base=0)

	# data
	data = b''
	hex = m.group(3)
	if hex is not None:
	data = bytes.fromhex(hex)
	fname = m.group(4)
	if fname is not None:
	with open(fname, 'rb') as fd:
	data = fd.read()

	# trunc/pad data
	if size is None:
	size = len(data)
	elif size < len(data):
	data = data[:size]
	elif len(data) < size:
	padlen = size - len(data)
	data = data + b'\x00'*padlen
	assert size == len(data)

	print(f'[MapMem::fromstr] MapMem(addr={addr:#x}, size={size:#x}, data=\'{data.hex()}\')')
	return MapMem(addr, size, data)

	@dataclass
	class DumpMem:
	addr: int
	size: int

	@classmethod
	def fromstr(cls, s):
	return cls(*map(base0int, s.split('+')))

	def align_down(n, a):
	return n & ~(a-1)

	def align_up(n, a):
	return (n + (a-1)) & ~(a-1)

	def mem_interval(addr, size):
	page = align_down(addr, PAGE_SIZE)
	span = align_up(addr - page + size, PAGE_SIZE)
	return Interval(page, page+span)

	# lifted from pwnlib.commandline.disasm
	def asmdump(code, entry, arch):
	offsets = disasm(code, vma=entry, instructions=False, byte=False, arch=arch)
	bytes = disasm(code, vma=entry, instructions=False, offset=False, arch=arch)
	instrs = disasm(code, vma=entry, byte=False, offset=False, arch=arch)
	instrs = highlight(instrs, PwntoolsLexer(), TerminalFormatter())

	for o,b,i in zip(*map(str.splitlines, (offsets, bytes, instrs))):
	print(o,b,i)

	def main(args):
	# grab/decode code to emulate
	if len(args.code) == 0:
	# default to hex if input is from a terminal
	ifmt = args.input_format or ('hex' if os.isatty(0) else 'raw')
	if ifmt == 'hex':
	code = bytes.fromhex(sys.stdin.read())
	else:
	code = sys.stdin.buffer.read()
	else:
	# default to hex
	ifmt = args.input_format or 'hex'
	if ifmt == 'hex':
	code = bytes.fromhex(''.join(args.code))
	else:
	raise RuntimeError('Cannot take raw bytes through arguments, use hex input format or stdin.')

	# dump code
	print('Code:')
	print(hexdump(code, begin=args.entry))
	asmdump(code, args.entry, PWN_ARCH[args.bitness])
	print()

	# build an interval tree of all mapped memory
	memmap = IntervalTree()
	memmap.add(mem_interval(args.entry, len(code)))
	for mem in args.map_mem:
	memmap.add(mem_interval(mem.addr, mem.size))
	# merge overlapping intervals (Unicorn chokes when multiple mappings overlap)
	memmap.merge_overlaps()

	# initialize user registers
	mu = Uc(UC_ARCH_X86, BITNESS[args.bitness])
	for reg, rnum in USER_REGISTERS.items():
	rval = getattr(args, reg)
	if rval is not None:
	mu.reg_write(rnum, rval)

	# map memory (using intervals from the interval tree)
	for iv in memmap:
	mu.mem_map(iv.begin, iv.end-iv.begin)

	# init memory
	for mem in args.map_mem:
	mu.mem_write(mem.addr, mem.data)

	# init code
	mu.mem_write(args.entry, code)

	# emulate code in infinite time & unlimited instructions
	print('Emulating...')
	try:
	mu.emu_start(args.entry, args.entry + len(code))
	print('Emulation done.')
	except UcError as e:
	print(f'Emulation crashed: {e}')
	print(' (RIP below may be inaccurate)')
	finally:
	# dump registers
	print()
	for reg, rnum in ALL_REGISTERS.items():
	rval = mu.reg_read(rnum)
	print(f'>>> {reg.upper()} = {rval:#x}')
	# dump memory
	for mem in args.dump_mem:
	data = mu.mem_read(mem.addr, mem.size)
	print()
	print(f'>>> @{mem.addr:#x} +{mem.size:#x}')
	print(hexdump(data, begin=mem.addr))

	if __name__ == '__main__':
	import argparse
	parser = argparse.ArgumentParser('x86emu')
	for reg in USER_REGISTERS.keys():
	parser.add_argument(f'--{reg}', type=base0int, help=f'initial value for {reg.upper()}')
	parser.add_argument('-e', '--entry', default=0, type=base0int, help='entry point')
	parser.add_argument('-b', '--bitness', default=64, type=int, choices=[64, 32], help='CPU mode (default: 64)')
	parser.add_argument('-i', '--input-format', choices=['hex', 'raw'], help='code format: hex, raw (for stdin only)')
	parser.add_argument('-m', '--map-mem', action='append', default=[], type=MapMem.fromstr, help='map specified memory range (format: <addr>[+<size>][:<hex>\|@<file>]) at start of emulation')
	parser.add_argument('-d', '--dump-mem', action='append', default=[], type=DumpMem.fromstr, help='dump specified memory range (format: <addr>+<size>) at end of emulation')
	parser.add_argument('code', nargs=argparse.REMAINDER, help='instructions to emulate (read from stdin if none)')
	main(parser.parse_args())