Skip to content

Instantly share code, notes, and snippets.

@pablosanderman
Last active February 4, 2025 14:53
Show Gist options
  • Save pablosanderman/3fc8e4bb2a7ef39e5ac9ec2bba5517e1 to your computer and use it in GitHub Desktop.
Save pablosanderman/3fc8e4bb2a7ef39e5ac9ec2bba5517e1 to your computer and use it in GitHub Desktop.
Performance Aware Programming Course Homework 2
from dataclasses import dataclass
from enum import IntEnum
from typing import Tuple
import struct
class OperationCode(IntEnum):
MOV_REG_MEM_TO_REG_MEM = 0b10001000
MOV_IMM_TO_REG_MEM = 0b11000100
MOV_IMM_TO_REG_LOW_LOW = 0b10110000
MOV_IMM_TO_REG_LOW_HIGH = 0b10110100
MOV_IMM_TO_REG_HIGH_LOW = 0b10111000
MOV_IMM_TO_REG_HIGH_HIGH = 0b10111100
MOV_MEM_ACC_TO_MEM_ACC = 0b10100000
MOV_REG_MEM_TO_SEG = 0b10001100
@dataclass
class Operation:
code: int
direction: bool
word: bool
mode: int
operand_one: int
operand_one_code: str
operand_two: int
operand_two_code: str
data: int
byte: int
bytes: bytearray
REGISTER_TABLE = [
"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"
]
MEMORY_TABLE = [
"bx + si%s",
"bx + di%s",
"bp + si%s",
"bp + di%s",
"si%s",
"di%s",
"bp%s",
"bx%s"
]
MAX_INSTRUCTION_BYTES = 6
def decode_operation(bytes_stream: bytearray, index: int) -> Tuple[Operation, str]:
op = Operation(
code=0, direction=False, word=False, mode=0,
operand_one=0, operand_one_code="", operand_two=0,
operand_two_code="", data=0,
byte=0, bytes=bytearray()
)
def next_byte():
if not op.bytes:
op.bytes = bytearray([bytes_stream[index]])
else:
op.bytes.append(bytes_stream[len(op.bytes) + index])
op.byte = op.bytes[-1]
return op.byte
first_byte = next_byte()
op.code = first_byte & 0b11111100
op.direction = bool(first_byte & 0b00000010)
op.word = bool(first_byte & 0b00000001)
result = ""
if op.code == OperationCode.MOV_REG_MEM_TO_REG_MEM or op.code == OperationCode.MOV_IMM_TO_REG_MEM:
second_byte = next_byte()
op.mode = second_byte >> 6
op.operand_one = (second_byte >> 3) & 0b00000111
op.operand_one_code = REGISTER_TABLE[op.operand_one | (op.word << 3)]
op.operand_two = second_byte & 0b00000111
direct_access = False
if op.mode == 0b00: # memory mode: no displacement, except if r/m 0b110
op.operand_one
if op.operand_two == 0b110:
direct_access = True
op.operand_two_code = '[%s]'
disp_low = next_byte()
disp_high = next_byte()
op.data = disp_high << 8 | disp_low
else:
op.operand_two_code = f"[{MEMORY_TABLE[op.operand_two] % ''}]"
if op.mode == 0b01:
disp_low = next_byte()
op.data = struct.unpack('b', bytes([disp_low]))[0]
if op.data < 0: # memory mode: 8-bit displacement
op.operand_two_code = f"[{
MEMORY_TABLE[op.operand_two] % f' - {abs(op.data)}'}]"
else:
op.operand_two_code = f"[{
MEMORY_TABLE[op.operand_two] % f' + {op.data}'}]"
if op.mode == 0b10: # memory mode: 16-bit displacement
disp_low = next_byte()
disp_high = next_byte()
op.data = struct.unpack('h', bytes([disp_low, disp_high]))[0]
if op.data < 0:
op.operand_two_code = f"[{
MEMORY_TABLE[op.operand_two] % f' - {abs(op.data)}'}]"
else:
op.operand_two_code = f"[{
MEMORY_TABLE[op.operand_two] % f' + {op.data}'}]"
if op.mode == 0b11: # register mode (no displacement)
op.operand_two_code = REGISTER_TABLE[op.operand_two | (
op.word << 3)]
if direct_access:
op.operand_two_code = op.operand_two_code % str(op.data)
if op.code == OperationCode.MOV_IMM_TO_REG_MEM:
op.direction = False
if op.word:
data_low = next_byte()
data_high = next_byte()
op.data = data_low | (data_high << 8)
else:
data_low = next_byte()
op.data = data_low
op.operand_one_code = f"{'word' if op.word else 'byte'} {op.data}"
if op.direction:
result = f"mov {op.operand_one_code}, {op.operand_two_code}"
else:
result = f"mov {op.operand_two_code}, {op.operand_one_code}"
elif op.code in (OperationCode.MOV_IMM_TO_REG_LOW_LOW, OperationCode.MOV_IMM_TO_REG_LOW_HIGH,
OperationCode.MOV_IMM_TO_REG_HIGH_LOW, OperationCode.MOV_IMM_TO_REG_HIGH_HIGH):
op.word = bool((first_byte >> 3) & 0b1)
op.operand_one = first_byte & 0b00000111
op.operand_one_code = REGISTER_TABLE[op.operand_one | (op.word << 3)]
data_low = next_byte()
if op.word:
data_high = next_byte()
op.data = struct.unpack('h', bytes([data_low, data_high]))[0]
else:
op.data = struct.unpack('b', bytes([data_low]))[0]
result = f"mov {op.operand_one_code}, {op.data}"
elif op.code == OperationCode.MOV_MEM_ACC_TO_MEM_ACC:
addr_low = next_byte()
addr_high = next_byte()
op.data = (addr_high << 8) | addr_low
op.operand_one = op.data
op.operand_one_code = f'[{op.data}]'
op.operand_two_code = 0b000
op.operand_two_code = REGISTER_TABLE[op.operand_two | (op.word << 3)]
if op.direction:
result = f"mov {op.operand_one_code}, {op.operand_two_code}"
else:
result = f"mov {op.operand_two_code}, {op.operand_one_code}"
else:
return op, "unk"
return op, result
def disassemble(filename: str, show_address: bool = False, show_bytes: bool = False) -> str:
with open(filename, 'rb') as f:
bytes_stream = bytearray(f.read())
lines = []
lines.append(f"; {filename}\n")
if not show_address and not show_bytes:
lines.append("bits 16\n")
i = 0
while i < len(bytes_stream):
op, decoded_instruction = decode_operation(bytes_stream, i)
line_parts = []
if show_address:
line_parts.append(f"0x{i:04x}")
if show_bytes:
bytes_str = " ".join([f"{b:02x}" for b in op.bytes])
bytes_str = bytes_str.ljust(MAX_INSTRUCTION_BYTES * 3 - 1)
line_parts.append(f"| {bytes_str} |")
line_parts.append(decoded_instruction)
lines.append(" ".join(line_parts))
i += len(op.bytes)
return "\n".join(lines)
def main():
# filename = 'listing_0039_more_movs'
filename = 'listing_0040_challenge_movs'
result = disassemble(filename,
show_address=False, show_bytes=False)
print(result)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment