Skip to content

Instantly share code, notes, and snippets.

@laomaiweng
Last active July 20, 2023 08:51
Show Gist options
  • Save laomaiweng/3f9f88758a92d410f539c584f179b44a to your computer and use it in GitHub Desktop.
Save laomaiweng/3f9f88758a92d410f539c584f179b44a to your computer and use it in GitHub Desktop.
basic x86-64 emulator using unicorn
#!/usr/bin/env python3
from dataclasses import dataclass
import os
import re
import sys
from intervaltree import Interval, IntervalTree
from pygments import highlight
from pygments.formatters import TerminalFormatter
from pwnlib.asm import disasm
from pwnlib.lexer import PwntoolsLexer
from pwnlib.util.fiddling import hexdump
from unicorn import *
from unicorn.x86_const import *
# addr +size :hex @file
MAPMEM_RE = re.compile(r'((?:0x)?[0-9]+)(?:\+((?:0x)?[0-9]+))?(?::((?:[0-9a-fA-F][0-9a-fA-F])+)|@(.+))?')
PAGE_SIZE = 0x1000
BITNESS = {64: UC_MODE_64, 32: UC_MODE_32}
PWN_ARCH = {64: 'amd64', 32: 'i386'}
USER_REGISTERS = {'rax': UC_X86_REG_RAX, 'rbx': UC_X86_REG_RBX, 'rcx': UC_X86_REG_RCX, 'rdx': UC_X86_REG_RDX, 'rbp': UC_X86_REG_RBP, 'rsp': UC_X86_REG_RSP}
ALL_REGISTERS = USER_REGISTERS.copy()
ALL_REGISTERS.update({'rip': UC_X86_REG_RIP, 'rflags': UC_X86_REG_RFLAGS})
base0int = lambda x: int(x, base=0)
@dataclass
class MapMem:
addr: int
size: int | None
data: bytes
@classmethod
def fromstr(cls, s):
m = MAPMEM_RE.fullmatch(s)
if m is None:
raise RuntimeError(f'Syntax error in --map-mem spec: {s}')
print(f'[MapMem::fromstr] addr={m.group(1)} size={m.group(2)} hex={m.group(3)} file={m.group(4)}')
# addr
addr = int(m.group(1), base=0)
# size
size = m.group(2)
if size is not None:
size = int(size, base=0)
# data
data = b''
hex = m.group(3)
if hex is not None:
data = bytes.fromhex(hex)
fname = m.group(4)
if fname is not None:
with open(fname, 'rb') as fd:
data = fd.read()
# trunc/pad data
if size is None:
size = len(data)
elif size < len(data):
data = data[:size]
elif len(data) < size:
padlen = size - len(data)
data = data + b'\x00'*padlen
assert size == len(data)
print(f'[MapMem::fromstr] MapMem(addr={addr:#x}, size={size:#x}, data=\'{data.hex()}\')')
return MapMem(addr, size, data)
@dataclass
class DumpMem:
addr: int
size: int
@classmethod
def fromstr(cls, s):
return cls(*map(base0int, s.split('+')))
def align_down(n, a):
return n & ~(a-1)
def align_up(n, a):
return (n + (a-1)) & ~(a-1)
def mem_interval(addr, size):
page = align_down(addr, PAGE_SIZE)
span = align_up(addr - page + size, PAGE_SIZE)
return Interval(page, page+span)
# lifted from pwnlib.commandline.disasm
def asmdump(code, entry, arch):
offsets = disasm(code, vma=entry, instructions=False, byte=False, arch=arch)
bytes = disasm(code, vma=entry, instructions=False, offset=False, arch=arch)
instrs = disasm(code, vma=entry, byte=False, offset=False, arch=arch)
instrs = highlight(instrs, PwntoolsLexer(), TerminalFormatter())
for o,b,i in zip(*map(str.splitlines, (offsets, bytes, instrs))):
print(o,b,i)
def main(args):
# grab/decode code to emulate
if len(args.code) == 0:
# default to hex if input is from a terminal
ifmt = args.input_format or ('hex' if os.isatty(0) else 'raw')
if ifmt == 'hex':
code = bytes.fromhex(sys.stdin.read())
else:
code = sys.stdin.buffer.read()
else:
# default to hex
ifmt = args.input_format or 'hex'
if ifmt == 'hex':
code = bytes.fromhex(''.join(args.code))
else:
raise RuntimeError('Cannot take raw bytes through arguments, use hex input format or stdin.')
# dump code
print('Code:')
print(hexdump(code, begin=args.entry))
asmdump(code, args.entry, PWN_ARCH[args.bitness])
print()
# build an interval tree of all mapped memory
memmap = IntervalTree()
memmap.add(mem_interval(args.entry, len(code)))
for mem in args.map_mem:
memmap.add(mem_interval(mem.addr, mem.size))
# merge overlapping intervals (Unicorn chokes when multiple mappings overlap)
memmap.merge_overlaps()
# initialize user registers
mu = Uc(UC_ARCH_X86, BITNESS[args.bitness])
for reg, rnum in USER_REGISTERS.items():
rval = getattr(args, reg)
if rval is not None:
mu.reg_write(rnum, rval)
# map memory (using intervals from the interval tree)
for iv in memmap:
mu.mem_map(iv.begin, iv.end-iv.begin)
# init memory
for mem in args.map_mem:
mu.mem_write(mem.addr, mem.data)
# init code
mu.mem_write(args.entry, code)
# emulate code in infinite time & unlimited instructions
print('Emulating...')
try:
mu.emu_start(args.entry, args.entry + len(code))
print('Emulation done.')
except UcError as e:
print(f'Emulation crashed: {e}')
print(' (RIP below may be inaccurate)')
finally:
# dump registers
print()
for reg, rnum in ALL_REGISTERS.items():
rval = mu.reg_read(rnum)
print(f'>>> {reg.upper()} = {rval:#x}')
# dump memory
for mem in args.dump_mem:
data = mu.mem_read(mem.addr, mem.size)
print()
print(f'>>> @{mem.addr:#x} +{mem.size:#x}')
print(hexdump(data, begin=mem.addr))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser('x86emu')
for reg in USER_REGISTERS.keys():
parser.add_argument(f'--{reg}', type=base0int, help=f'initial value for {reg.upper()}')
parser.add_argument('-e', '--entry', default=0, type=base0int, help='entry point')
parser.add_argument('-b', '--bitness', default=64, type=int, choices=[64, 32], help='CPU mode (default: 64)')
parser.add_argument('-i', '--input-format', choices=['hex', 'raw'], help='code format: hex, raw (for stdin only)')
parser.add_argument('-m', '--map-mem', action='append', default=[], type=MapMem.fromstr, help='map specified memory range (format: <addr>[+<size>][:<hex>|@<file>]) at start of emulation')
parser.add_argument('-d', '--dump-mem', action='append', default=[], type=DumpMem.fromstr, help='dump specified memory range (format: <addr>+<size>) at end of emulation')
parser.add_argument('code', nargs=argparse.REMAINDER, help='instructions to emulate (read from stdin if none)')
main(parser.parse_args())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment