Skip to content

Instantly share code, notes, and snippets.

@integeruser
Last active July 6, 2019 16:15
Show Gist options
  • Save integeruser/5509d0d0e533db0e4c2e488fe8b9f46c to your computer and use it in GitHub Desktop.
Save integeruser/5509d0d0e533db0e4c2e488fe8b9f46c to your computer and use it in GitHub Desktop.
Enhance disassembly of the function surrounding the pc of the selected frame
#!/usr/bin/env python3
import collections
import random
import re
import shutil
import gdb
colors = {
'red': '\u001b[31m',
'green': '\u001b[32m',
'yellow': '\u001b[33m',
'blue': '\u001b[34m',
'magenta': '\u001b[35m',
'cyan': '\u001b[36m',
'white': '\u001b[37m',
'reset': '\u001b[0m'
}
columns = shutil.get_terminal_size((80, 20)).columns
def colorize(text, color):
return '%s%s%s' % (colors[color], text, colors['reset'])
def decolorize(text):
return re.sub(r'(\u001b\[3.m)|(\u001b\[0m)', '', text)
###################################################################################################
def parse_disassembly(disassembly):
instructions = collections.OrderedDict()
for line in disassembly:
try:
curr, addr, off, instr, info = parse_instruction(line)
except AttributeError:
continue # not an instruction
else:
instructions[addr] = curr, addr, off, instr, info
return instructions
def parse_instruction(line):
curr, addr, off, instr, info = re.search(
r'(?P<curr>=>)?[ +](?P<addr>0[xX][0-9a-fA-F]+) <\+(?P<off>\d+)>:\s+(?P<instr>.+?(?=\s+(?P<info>#\s+.+?$)|$))',
line).groups()
curr = True if curr is not None else False
addr, off = int(addr, 16), int(off)
instr, info = instr.strip(), info.strip() if info else None
return curr, addr, off, instr, info
###################################################################################################
def find_jumps(instructions):
jumps = {}
for _, from_addr, _, instr, _ in instructions.values():
try:
jumps[from_addr] = int(
re.search(r'[jmp|je|jne|ja|jb]\s+(?P<to_addr>0[xX][0-9a-fA-F]+)',
instr).group('to_addr'), 16)
except AttributeError:
continue # not a jump
return jumps
def find_paths(instructions, jumps):
paths = []
for from_addr, to_addr in jumps.items():
start_addr = min(from_addr, to_addr)
end_addr = max(from_addr, to_addr)
steps = end_addr - start_addr
paths.append((start_addr, end_addr, steps))
sorted_jumps = sorted(paths, key=lambda t: t[2], reverse=True)
paths = {addr: [(' ', 'reset') for _ in range(len(jumps))] for addr in instructions.keys()}
for i, (start_addr, end_addr, _) in enumerate(sorted_jumps):
color = list(colors.keys())[i % len(colors)]
paths[start_addr][i] = ('┏', color)
for j in range(i + 1, len(jumps)):
if paths[start_addr][j][0] == '┏':
break
paths[start_addr][j] = ('━', color)
for addr in range(start_addr + 1, end_addr):
try:
paths[addr][i] = ('┃', color)
except KeyError:
continue
paths[end_addr][i] = ('┗', color)
for j in range(i + 1, len(jumps)):
if paths[end_addr][j][0] == '┗':
break
paths[end_addr][j] = ('━', color)
return paths
def find_basic_blocks(instructions, jumps):
addresses = list(instructions.keys())
basic_blocks = set()
for from_addr, to_addr in jumps.items():
basic_blocks.add(addresses[addresses.index(from_addr) + 1])
basic_blocks.add(to_addr)
return basic_blocks
###################################################################################################
def to_pseudo(instruction, info):
pseudo = ''
call_match = re.search('call\s+(?P<addr>.+) (<(?P<name>.+)>)?', instruction)
if call_match:
try:
pseudo = 'call ' + call_match.group('name')
except IndexError:
pseudo = 'call ' + call_match.group('addr')
return colorize(pseudo, 'yellow')
mov_match = re.search('mov\s+(?P<dst>.+),(?P<src>.+)', instruction)
if mov_match:
pseudo = mov_match.group('dst') + ' := ' + mov_match.group('src')
pseudo = re.sub(r'.WORD PTR \[(.+?)\]', r'[\1]', pseudo)
if info:
addr = re.search(r'0[xX][0-9a-fA-F]+', info).group(0)
pseudo = re.sub(r'rip\+0[xX][0-9a-fA-F]+', addr, pseudo)
return pseudo
lea_match = re.search(r'lea\s+(?P<dst>.+),\[(?P<src>.+)\]', instruction)
if lea_match:
pseudo = lea_match.group('dst') + ' := ' + lea_match.group('src')
pseudo = re.sub(r'.WORD PTR \[(.+?)\]', r'[\1]', pseudo)
if info:
addr = re.search(r'0[xX][0-9a-fA-F]+', info).group(0)
pseudo = re.sub(r'rip\+0[xX][0-9a-fA-F]+', addr, pseudo)
return pseudo
add_match = re.search('add\s+(?P<dst>.+),(?P<op>.+)', instruction)
if add_match:
pseudo = add_match.group('dst') + ' += ' + add_match.group('op')
return pseudo
sub_match = re.search('sub\s+(?P<dst>.+),(?P<op>.+)', instruction)
if sub_match:
pseudo = sub_match.group('dst') + ' -= ' + sub_match.group('op')
return pseudo
and_match = re.search('and\s+(?P<dst>.+),(?P<op>.+)', instruction)
if and_match:
pseudo = and_match.group('dst') + ' &= ' + and_match.group('op')
return pseudo
xor_match = re.search('xor\s+(?P<dst>.+),(?P<op>.+)', instruction)
if xor_match:
pseudo = xor_match.group('dst') + ' ^= ' + xor_match.group('op')
return pseudo
###################################################################################################
def colorize_registers(code):
code = re.sub(r'(?P<reg>[re]?ax)', lambda m: colorize(m.group('reg'), 'white'), code)
code = re.sub(r'(?P<reg>[re]?bx)', lambda m: colorize(m.group('reg'), 'green'), code)
code = re.sub(r'(?P<reg>[re]?cx)', lambda m: colorize(m.group('reg'), 'cyan'), code)
code = re.sub(r'(?P<reg>[re]?dx)', lambda m: colorize(m.group('reg'), 'magenta'), code)
code = re.sub(r'(?P<reg>[re]di)', lambda m: colorize(m.group('reg'), 'yellow'), code)
code = re.sub(r'(?P<reg>[re]si)', lambda m: colorize(m.group('reg'), 'blue'), code)
return code
###################################################################################################
class EnhanceCommand(gdb.Command):
'Enhance disassembly of the function surrounding the pc of the selected frame.'
def __init__(self):
super(EnhanceCommand, self).__init__('enhance', gdb.COMMAND_SUPPORT, gdb.COMPLETE_NONE)
def invoke(self, argument, from_tty):
try:
disassembly = gdb.execute('disassemble %s' % argument, to_string=True).split('\n')
except Exception as e:
print(e)
return
instructions = parse_disassembly(disassembly)
jumps = find_jumps(instructions)
paths = find_paths(instructions, jumps)
basic_blocks = find_basic_blocks(instructions, jumps)
print(disassembly[0]) # 'Dump of assembler code for function *:'
for curr, addr, _, instr, info in instructions.values():
addr_str = str(hex(addr))
if addr in basic_blocks:
prefix = '─' * (2 + 1 + len(addr_str) + 1)
middle = ''.join(
colorize('┃', color) if c == '┃' or c == '┗' else '─'
for i, (c, color) in enumerate(paths[addr]))
suffix = '─' * (columns - len(prefix) - len(jumps))
print(prefix + middle + suffix)
path = ''.join(colorize(c, color) for j, (c, color) in enumerate(paths[addr]))
pseudo = to_pseudo(instr, info)
code = pseudo if pseudo else instr
code = colorize_registers(code)
instr = instr if pseudo else ''
line = '{curr} {addr} {path} {code: <{width}} '.format(
curr='=>' if curr else ' ',
addr=addr_str,
path=path,
code=code,
width=35 + len(code) - len(decolorize(code)))
if len(decolorize(line)) + len(instr) > columns:
line += '\033[92m' + instr[:(columns - len(decolorize(line)) - 1)] + '…' + '\033[0m'
else:
line += '\033[92m' + instr + '\033[0m'
print(line)
print(disassembly[-2]) # 'End of assembler dump.'
EnhanceCommand()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment