Last active
February 4, 2025 14:53
-
-
Save pablosanderman/3fc8e4bb2a7ef39e5ac9ec2bba5517e1 to your computer and use it in GitHub Desktop.
Performance Aware Programming Course Homework 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dataclasses import dataclass | |
from enum import IntEnum | |
from typing import Tuple | |
import struct | |
class OperationCode(IntEnum): | |
MOV_REG_MEM_TO_REG_MEM = 0b10001000 | |
MOV_IMM_TO_REG_MEM = 0b11000100 | |
MOV_IMM_TO_REG_LOW_LOW = 0b10110000 | |
MOV_IMM_TO_REG_LOW_HIGH = 0b10110100 | |
MOV_IMM_TO_REG_HIGH_LOW = 0b10111000 | |
MOV_IMM_TO_REG_HIGH_HIGH = 0b10111100 | |
MOV_MEM_ACC_TO_MEM_ACC = 0b10100000 | |
MOV_REG_MEM_TO_SEG = 0b10001100 | |
@dataclass | |
class Operation: | |
code: int | |
direction: bool | |
word: bool | |
mode: int | |
operand_one: int | |
operand_one_code: str | |
operand_two: int | |
operand_two_code: str | |
data: int | |
byte: int | |
bytes: bytearray | |
REGISTER_TABLE = [ | |
"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", | |
"ax", "cx", "dx", "bx", "sp", "bp", "si", "di" | |
] | |
MEMORY_TABLE = [ | |
"bx + si%s", | |
"bx + di%s", | |
"bp + si%s", | |
"bp + di%s", | |
"si%s", | |
"di%s", | |
"bp%s", | |
"bx%s" | |
] | |
MAX_INSTRUCTION_BYTES = 6 | |
def decode_operation(bytes_stream: bytearray, index: int) -> Tuple[Operation, str]: | |
op = Operation( | |
code=0, direction=False, word=False, mode=0, | |
operand_one=0, operand_one_code="", operand_two=0, | |
operand_two_code="", data=0, | |
byte=0, bytes=bytearray() | |
) | |
def next_byte(): | |
if not op.bytes: | |
op.bytes = bytearray([bytes_stream[index]]) | |
else: | |
op.bytes.append(bytes_stream[len(op.bytes) + index]) | |
op.byte = op.bytes[-1] | |
return op.byte | |
first_byte = next_byte() | |
op.code = first_byte & 0b11111100 | |
op.direction = bool(first_byte & 0b00000010) | |
op.word = bool(first_byte & 0b00000001) | |
result = "" | |
if op.code == OperationCode.MOV_REG_MEM_TO_REG_MEM or op.code == OperationCode.MOV_IMM_TO_REG_MEM: | |
second_byte = next_byte() | |
op.mode = second_byte >> 6 | |
op.operand_one = (second_byte >> 3) & 0b00000111 | |
op.operand_one_code = REGISTER_TABLE[op.operand_one | (op.word << 3)] | |
op.operand_two = second_byte & 0b00000111 | |
direct_access = False | |
if op.mode == 0b00: # memory mode: no displacement, except if r/m 0b110 | |
op.operand_one | |
if op.operand_two == 0b110: | |
direct_access = True | |
op.operand_two_code = '[%s]' | |
disp_low = next_byte() | |
disp_high = next_byte() | |
op.data = disp_high << 8 | disp_low | |
else: | |
op.operand_two_code = f"[{MEMORY_TABLE[op.operand_two] % ''}]" | |
if op.mode == 0b01: | |
disp_low = next_byte() | |
op.data = struct.unpack('b', bytes([disp_low]))[0] | |
if op.data < 0: # memory mode: 8-bit displacement | |
op.operand_two_code = f"[{ | |
MEMORY_TABLE[op.operand_two] % f' - {abs(op.data)}'}]" | |
else: | |
op.operand_two_code = f"[{ | |
MEMORY_TABLE[op.operand_two] % f' + {op.data}'}]" | |
if op.mode == 0b10: # memory mode: 16-bit displacement | |
disp_low = next_byte() | |
disp_high = next_byte() | |
op.data = struct.unpack('h', bytes([disp_low, disp_high]))[0] | |
if op.data < 0: | |
op.operand_two_code = f"[{ | |
MEMORY_TABLE[op.operand_two] % f' - {abs(op.data)}'}]" | |
else: | |
op.operand_two_code = f"[{ | |
MEMORY_TABLE[op.operand_two] % f' + {op.data}'}]" | |
if op.mode == 0b11: # register mode (no displacement) | |
op.operand_two_code = REGISTER_TABLE[op.operand_two | ( | |
op.word << 3)] | |
if direct_access: | |
op.operand_two_code = op.operand_two_code % str(op.data) | |
if op.code == OperationCode.MOV_IMM_TO_REG_MEM: | |
op.direction = False | |
if op.word: | |
data_low = next_byte() | |
data_high = next_byte() | |
op.data = data_low | (data_high << 8) | |
else: | |
data_low = next_byte() | |
op.data = data_low | |
op.operand_one_code = f"{'word' if op.word else 'byte'} {op.data}" | |
if op.direction: | |
result = f"mov {op.operand_one_code}, {op.operand_two_code}" | |
else: | |
result = f"mov {op.operand_two_code}, {op.operand_one_code}" | |
elif op.code in (OperationCode.MOV_IMM_TO_REG_LOW_LOW, OperationCode.MOV_IMM_TO_REG_LOW_HIGH, | |
OperationCode.MOV_IMM_TO_REG_HIGH_LOW, OperationCode.MOV_IMM_TO_REG_HIGH_HIGH): | |
op.word = bool((first_byte >> 3) & 0b1) | |
op.operand_one = first_byte & 0b00000111 | |
op.operand_one_code = REGISTER_TABLE[op.operand_one | (op.word << 3)] | |
data_low = next_byte() | |
if op.word: | |
data_high = next_byte() | |
op.data = struct.unpack('h', bytes([data_low, data_high]))[0] | |
else: | |
op.data = struct.unpack('b', bytes([data_low]))[0] | |
result = f"mov {op.operand_one_code}, {op.data}" | |
elif op.code == OperationCode.MOV_MEM_ACC_TO_MEM_ACC: | |
addr_low = next_byte() | |
addr_high = next_byte() | |
op.data = (addr_high << 8) | addr_low | |
op.operand_one = op.data | |
op.operand_one_code = f'[{op.data}]' | |
op.operand_two_code = 0b000 | |
op.operand_two_code = REGISTER_TABLE[op.operand_two | (op.word << 3)] | |
if op.direction: | |
result = f"mov {op.operand_one_code}, {op.operand_two_code}" | |
else: | |
result = f"mov {op.operand_two_code}, {op.operand_one_code}" | |
else: | |
return op, "unk" | |
return op, result | |
def disassemble(filename: str, show_address: bool = False, show_bytes: bool = False) -> str: | |
with open(filename, 'rb') as f: | |
bytes_stream = bytearray(f.read()) | |
lines = [] | |
lines.append(f"; {filename}\n") | |
if not show_address and not show_bytes: | |
lines.append("bits 16\n") | |
i = 0 | |
while i < len(bytes_stream): | |
op, decoded_instruction = decode_operation(bytes_stream, i) | |
line_parts = [] | |
if show_address: | |
line_parts.append(f"0x{i:04x}") | |
if show_bytes: | |
bytes_str = " ".join([f"{b:02x}" for b in op.bytes]) | |
bytes_str = bytes_str.ljust(MAX_INSTRUCTION_BYTES * 3 - 1) | |
line_parts.append(f"| {bytes_str} |") | |
line_parts.append(decoded_instruction) | |
lines.append(" ".join(line_parts)) | |
i += len(op.bytes) | |
return "\n".join(lines) | |
def main(): | |
# filename = 'listing_0039_more_movs' | |
filename = 'listing_0040_challenge_movs' | |
result = disassemble(filename, | |
show_address=False, show_bytes=False) | |
print(result) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment