Last active
July 20, 2023 08:51
-
-
Save laomaiweng/3f9f88758a92d410f539c584f179b44a to your computer and use it in GitHub Desktop.
basic x86-64 emulator using unicorn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from dataclasses import dataclass | |
import os | |
import re | |
import sys | |
from intervaltree import Interval, IntervalTree | |
from pygments import highlight | |
from pygments.formatters import TerminalFormatter | |
from pwnlib.asm import disasm | |
from pwnlib.lexer import PwntoolsLexer | |
from pwnlib.util.fiddling import hexdump | |
from unicorn import * | |
from unicorn.x86_const import * | |
# addr +size :hex @file | |
MAPMEM_RE = re.compile(r'((?:0x)?[0-9]+)(?:\+((?:0x)?[0-9]+))?(?::((?:[0-9a-fA-F][0-9a-fA-F])+)|@(.+))?') | |
PAGE_SIZE = 0x1000 | |
BITNESS = {64: UC_MODE_64, 32: UC_MODE_32} | |
PWN_ARCH = {64: 'amd64', 32: 'i386'} | |
USER_REGISTERS = {'rax': UC_X86_REG_RAX, 'rbx': UC_X86_REG_RBX, 'rcx': UC_X86_REG_RCX, 'rdx': UC_X86_REG_RDX, 'rbp': UC_X86_REG_RBP, 'rsp': UC_X86_REG_RSP} | |
ALL_REGISTERS = USER_REGISTERS.copy() | |
ALL_REGISTERS.update({'rip': UC_X86_REG_RIP, 'rflags': UC_X86_REG_RFLAGS}) | |
base0int = lambda x: int(x, base=0) | |
@dataclass | |
class MapMem: | |
addr: int | |
size: int | None | |
data: bytes | |
@classmethod | |
def fromstr(cls, s): | |
m = MAPMEM_RE.fullmatch(s) | |
if m is None: | |
raise RuntimeError(f'Syntax error in --map-mem spec: {s}') | |
print(f'[MapMem::fromstr] addr={m.group(1)} size={m.group(2)} hex={m.group(3)} file={m.group(4)}') | |
# addr | |
addr = int(m.group(1), base=0) | |
# size | |
size = m.group(2) | |
if size is not None: | |
size = int(size, base=0) | |
# data | |
data = b'' | |
hex = m.group(3) | |
if hex is not None: | |
data = bytes.fromhex(hex) | |
fname = m.group(4) | |
if fname is not None: | |
with open(fname, 'rb') as fd: | |
data = fd.read() | |
# trunc/pad data | |
if size is None: | |
size = len(data) | |
elif size < len(data): | |
data = data[:size] | |
elif len(data) < size: | |
padlen = size - len(data) | |
data = data + b'\x00'*padlen | |
assert size == len(data) | |
print(f'[MapMem::fromstr] MapMem(addr={addr:#x}, size={size:#x}, data=\'{data.hex()}\')') | |
return MapMem(addr, size, data) | |
@dataclass | |
class DumpMem: | |
addr: int | |
size: int | |
@classmethod | |
def fromstr(cls, s): | |
return cls(*map(base0int, s.split('+'))) | |
def align_down(n, a): | |
return n & ~(a-1) | |
def align_up(n, a): | |
return (n + (a-1)) & ~(a-1) | |
def mem_interval(addr, size): | |
page = align_down(addr, PAGE_SIZE) | |
span = align_up(addr - page + size, PAGE_SIZE) | |
return Interval(page, page+span) | |
# lifted from pwnlib.commandline.disasm | |
def asmdump(code, entry, arch): | |
offsets = disasm(code, vma=entry, instructions=False, byte=False, arch=arch) | |
bytes = disasm(code, vma=entry, instructions=False, offset=False, arch=arch) | |
instrs = disasm(code, vma=entry, byte=False, offset=False, arch=arch) | |
instrs = highlight(instrs, PwntoolsLexer(), TerminalFormatter()) | |
for o,b,i in zip(*map(str.splitlines, (offsets, bytes, instrs))): | |
print(o,b,i) | |
def main(args): | |
# grab/decode code to emulate | |
if len(args.code) == 0: | |
# default to hex if input is from a terminal | |
ifmt = args.input_format or ('hex' if os.isatty(0) else 'raw') | |
if ifmt == 'hex': | |
code = bytes.fromhex(sys.stdin.read()) | |
else: | |
code = sys.stdin.buffer.read() | |
else: | |
# default to hex | |
ifmt = args.input_format or 'hex' | |
if ifmt == 'hex': | |
code = bytes.fromhex(''.join(args.code)) | |
else: | |
raise RuntimeError('Cannot take raw bytes through arguments, use hex input format or stdin.') | |
# dump code | |
print('Code:') | |
print(hexdump(code, begin=args.entry)) | |
asmdump(code, args.entry, PWN_ARCH[args.bitness]) | |
print() | |
# build an interval tree of all mapped memory | |
memmap = IntervalTree() | |
memmap.add(mem_interval(args.entry, len(code))) | |
for mem in args.map_mem: | |
memmap.add(mem_interval(mem.addr, mem.size)) | |
# merge overlapping intervals (Unicorn chokes when multiple mappings overlap) | |
memmap.merge_overlaps() | |
# initialize user registers | |
mu = Uc(UC_ARCH_X86, BITNESS[args.bitness]) | |
for reg, rnum in USER_REGISTERS.items(): | |
rval = getattr(args, reg) | |
if rval is not None: | |
mu.reg_write(rnum, rval) | |
# map memory (using intervals from the interval tree) | |
for iv in memmap: | |
mu.mem_map(iv.begin, iv.end-iv.begin) | |
# init memory | |
for mem in args.map_mem: | |
mu.mem_write(mem.addr, mem.data) | |
# init code | |
mu.mem_write(args.entry, code) | |
# emulate code in infinite time & unlimited instructions | |
print('Emulating...') | |
try: | |
mu.emu_start(args.entry, args.entry + len(code)) | |
print('Emulation done.') | |
except UcError as e: | |
print(f'Emulation crashed: {e}') | |
print(' (RIP below may be inaccurate)') | |
finally: | |
# dump registers | |
print() | |
for reg, rnum in ALL_REGISTERS.items(): | |
rval = mu.reg_read(rnum) | |
print(f'>>> {reg.upper()} = {rval:#x}') | |
# dump memory | |
for mem in args.dump_mem: | |
data = mu.mem_read(mem.addr, mem.size) | |
print() | |
print(f'>>> @{mem.addr:#x} +{mem.size:#x}') | |
print(hexdump(data, begin=mem.addr)) | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser('x86emu') | |
for reg in USER_REGISTERS.keys(): | |
parser.add_argument(f'--{reg}', type=base0int, help=f'initial value for {reg.upper()}') | |
parser.add_argument('-e', '--entry', default=0, type=base0int, help='entry point') | |
parser.add_argument('-b', '--bitness', default=64, type=int, choices=[64, 32], help='CPU mode (default: 64)') | |
parser.add_argument('-i', '--input-format', choices=['hex', 'raw'], help='code format: hex, raw (for stdin only)') | |
parser.add_argument('-m', '--map-mem', action='append', default=[], type=MapMem.fromstr, help='map specified memory range (format: <addr>[+<size>][:<hex>|@<file>]) at start of emulation') | |
parser.add_argument('-d', '--dump-mem', action='append', default=[], type=DumpMem.fromstr, help='dump specified memory range (format: <addr>+<size>) at end of emulation') | |
parser.add_argument('code', nargs=argparse.REMAINDER, help='instructions to emulate (read from stdin if none)') | |
main(parser.parse_args()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment