pablosanderman/homework_0002.py

## homework_0002.py
from dataclasses import dataclass
from enum import IntEnum
from typing import Tuple
import struct


class OperationCode(IntEnum):
    MOV_REG_MEM_TO_REG_MEM = 0b10001000
    MOV_IMM_TO_REG_MEM = 0b11000100
    MOV_IMM_TO_REG_LOW_LOW = 0b10110000
    MOV_IMM_TO_REG_LOW_HIGH = 0b10110100
    MOV_IMM_TO_REG_HIGH_LOW = 0b10111000
    MOV_IMM_TO_REG_HIGH_HIGH = 0b10111100
    MOV_MEM_ACC_TO_MEM_ACC = 0b10100000
    MOV_REG_MEM_TO_SEG = 0b10001100


@dataclass
class Operation:
    code: int
    direction: bool
    word: bool
    mode: int
    operand_one: int
    operand_one_code: str
    operand_two: int
    operand_two_code: str
    data: int
    byte: int
    bytes: bytearray


REGISTER_TABLE = [

    "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
    "ax", "cx", "dx", "bx", "sp", "bp", "si", "di"
]

MEMORY_TABLE = [
    "bx + si%s",
    "bx + di%s",
    "bp + si%s",
    "bp + di%s",
    "si%s",
    "di%s",
    "bp%s",
    "bx%s"
]

MAX_INSTRUCTION_BYTES = 6


def decode_operation(bytes_stream: bytearray, index: int) -> Tuple[Operation, str]:
    op = Operation(
        code=0, direction=False, word=False, mode=0,
        operand_one=0, operand_one_code="", operand_two=0,
        operand_two_code="", data=0,
        byte=0, bytes=bytearray()
    )

    def next_byte():
        if not op.bytes:
            op.bytes = bytearray([bytes_stream[index]])
        else:
            op.bytes.append(bytes_stream[len(op.bytes) + index])
        op.byte = op.bytes[-1]
        return op.byte

    first_byte = next_byte()
    op.code = first_byte & 0b11111100
    op.direction = bool(first_byte & 0b00000010)
    op.word = bool(first_byte & 0b00000001)
    result = ""

    if op.code == OperationCode.MOV_REG_MEM_TO_REG_MEM or op.code == OperationCode.MOV_IMM_TO_REG_MEM:
        second_byte = next_byte()
        op.mode = second_byte >> 6
        op.operand_one = (second_byte >> 3) & 0b00000111
        op.operand_one_code = REGISTER_TABLE[op.operand_one | (op.word << 3)]
        op.operand_two = second_byte & 0b00000111

        direct_access = False
        if op.mode == 0b00:  # memory mode: no displacement, except if r/m 0b110
            op.operand_one
            if op.operand_two == 0b110:
                direct_access = True
                op.operand_two_code = '[%s]'
                disp_low = next_byte()
                disp_high = next_byte()
                op.data = disp_high << 8 | disp_low
            else:
                op.operand_two_code = f"[{MEMORY_TABLE[op.operand_two] % ''}]"
        if op.mode == 0b01:
            disp_low = next_byte()
            op.data = struct.unpack('b', bytes([disp_low]))[0]
            if op.data < 0:  # memory mode: 8-bit displacement
                op.operand_two_code = f"[{
                    MEMORY_TABLE[op.operand_two] % f' - {abs(op.data)}'}]"
            else:
                op.operand_two_code = f"[{
                    MEMORY_TABLE[op.operand_two] % f' + {op.data}'}]"
        if op.mode == 0b10:  # memory mode: 16-bit displacement
            disp_low = next_byte()
            disp_high = next_byte()
            op.data = struct.unpack('h', bytes([disp_low, disp_high]))[0]
            if op.data < 0:
                op.operand_two_code = f"[{
                    MEMORY_TABLE[op.operand_two] % f' - {abs(op.data)}'}]"
            else:
                op.operand_two_code = f"[{
                    MEMORY_TABLE[op.operand_two] % f' + {op.data}'}]"
        if op.mode == 0b11:  # register mode (no displacement)
            op.operand_two_code = REGISTER_TABLE[op.operand_two | (
                op.word << 3)]

        if direct_access:
            op.operand_two_code = op.operand_two_code % str(op.data)

        if op.code == OperationCode.MOV_IMM_TO_REG_MEM:
            op.direction = False
            if op.word:
                data_low = next_byte()
                data_high = next_byte()
                op.data = data_low | (data_high << 8)
            else:
                data_low = next_byte()
                op.data = data_low
            op.operand_one_code = f"{'word' if op.word else 'byte'} {op.data}"

        if op.direction:
            result = f"mov {op.operand_one_code}, {op.operand_two_code}"
        else:
            result = f"mov {op.operand_two_code}, {op.operand_one_code}"

    elif op.code in (OperationCode.MOV_IMM_TO_REG_LOW_LOW, OperationCode.MOV_IMM_TO_REG_LOW_HIGH,
                     OperationCode.MOV_IMM_TO_REG_HIGH_LOW, OperationCode.MOV_IMM_TO_REG_HIGH_HIGH):
        op.word = bool((first_byte >> 3) & 0b1)
        op.operand_one = first_byte & 0b00000111
        op.operand_one_code = REGISTER_TABLE[op.operand_one | (op.word << 3)]
        data_low = next_byte()
        if op.word:
            data_high = next_byte()
            op.data = struct.unpack('h', bytes([data_low, data_high]))[0]
        else:
            op.data = struct.unpack('b', bytes([data_low]))[0]
        result = f"mov {op.operand_one_code}, {op.data}"

    elif op.code == OperationCode.MOV_MEM_ACC_TO_MEM_ACC:
        addr_low = next_byte()
        addr_high = next_byte()
        op.data = (addr_high << 8) | addr_low
        op.operand_one = op.data
        op.operand_one_code = f'[{op.data}]'
        op.operand_two_code = 0b000
        op.operand_two_code = REGISTER_TABLE[op.operand_two | (op.word << 3)]
        if op.direction:
            result = f"mov {op.operand_one_code}, {op.operand_two_code}"
        else:
            result = f"mov {op.operand_two_code}, {op.operand_one_code}"
    else:
        return op, "unk"

    return op, result


def disassemble(filename: str, show_address: bool = False, show_bytes: bool = False) -> str:
    with open(filename, 'rb') as f:
        bytes_stream = bytearray(f.read())

    lines = []
    lines.append(f"; {filename}\n")
    if not show_address and not show_bytes:
        lines.append("bits 16\n")

    i = 0
    while i < len(bytes_stream):
        op, decoded_instruction = decode_operation(bytes_stream, i)

        line_parts = []
        if show_address:
            line_parts.append(f"0x{i:04x}")

        if show_bytes:
            bytes_str = " ".join([f"{b:02x}" for b in op.bytes])
            bytes_str = bytes_str.ljust(MAX_INSTRUCTION_BYTES * 3 - 1)
            line_parts.append(f"| {bytes_str} |")

        line_parts.append(decoded_instruction)
        lines.append(" ".join(line_parts))

        i += len(op.bytes)

    return "\n".join(lines)


def main():
    # filename = 'listing_0039_more_movs'
    filename = 'listing_0040_challenge_movs'
    result = disassemble(filename,
                         show_address=False, show_bytes=False)
    print(result)


if __name__ == "__main__":
    main()
	from dataclasses import dataclass
	from enum import IntEnum
	from typing import Tuple
	import struct


	class OperationCode(IntEnum):
	MOV_REG_MEM_TO_REG_MEM = 0b10001000
	MOV_IMM_TO_REG_MEM = 0b11000100
	MOV_IMM_TO_REG_LOW_LOW = 0b10110000
	MOV_IMM_TO_REG_LOW_HIGH = 0b10110100
	MOV_IMM_TO_REG_HIGH_LOW = 0b10111000
	MOV_IMM_TO_REG_HIGH_HIGH = 0b10111100
	MOV_MEM_ACC_TO_MEM_ACC = 0b10100000
	MOV_REG_MEM_TO_SEG = 0b10001100


	@dataclass
	class Operation:
	code: int
	direction: bool
	word: bool
	mode: int
	operand_one: int
	operand_one_code: str
	operand_two: int
	operand_two_code: str
	data: int
	byte: int
	bytes: bytearray


	REGISTER_TABLE = [

	"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
	"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"
	]

	MEMORY_TABLE = [
	"bx + si%s",
	"bx + di%s",
	"bp + si%s",
	"bp + di%s",
	"si%s",
	"di%s",
	"bp%s",
	"bx%s"
	]

	MAX_INSTRUCTION_BYTES = 6


	def decode_operation(bytes_stream: bytearray, index: int) -> Tuple[Operation, str]:
	op = Operation(
	code=0, direction=False, word=False, mode=0,
	operand_one=0, operand_one_code="", operand_two=0,
	operand_two_code="", data=0,
	byte=0, bytes=bytearray()
	)

	def next_byte():
	if not op.bytes:
	op.bytes = bytearray([bytes_stream[index]])
	else:
	op.bytes.append(bytes_stream[len(op.bytes) + index])
	op.byte = op.bytes[-1]
	return op.byte

	first_byte = next_byte()
	op.code = first_byte & 0b11111100
	op.direction = bool(first_byte & 0b00000010)
	op.word = bool(first_byte & 0b00000001)
	result = ""

	if op.code == OperationCode.MOV_REG_MEM_TO_REG_MEM or op.code == OperationCode.MOV_IMM_TO_REG_MEM:
	second_byte = next_byte()
	op.mode = second_byte >> 6
	op.operand_one = (second_byte >> 3) & 0b00000111
	op.operand_one_code = REGISTER_TABLE[op.operand_one \| (op.word << 3)]
	op.operand_two = second_byte & 0b00000111

	direct_access = False
	if op.mode == 0b00: # memory mode: no displacement, except if r/m 0b110
	op.operand_one
	if op.operand_two == 0b110:
	direct_access = True
	op.operand_two_code = '[%s]'
	disp_low = next_byte()
	disp_high = next_byte()
	op.data = disp_high << 8 \| disp_low
	else:
	op.operand_two_code = f"[{MEMORY_TABLE[op.operand_two] % ''}]"
	if op.mode == 0b01:
	disp_low = next_byte()
	op.data = struct.unpack('b', bytes([disp_low]))[0]
	if op.data < 0: # memory mode: 8-bit displacement
	op.operand_two_code = f"[{
	MEMORY_TABLE[op.operand_two] % f' - {abs(op.data)}'}]"
	else:
	op.operand_two_code = f"[{
	MEMORY_TABLE[op.operand_two] % f' + {op.data}'}]"
	if op.mode == 0b10: # memory mode: 16-bit displacement
	disp_low = next_byte()
	disp_high = next_byte()
	op.data = struct.unpack('h', bytes([disp_low, disp_high]))[0]
	if op.data < 0:
	op.operand_two_code = f"[{
	MEMORY_TABLE[op.operand_two] % f' - {abs(op.data)}'}]"
	else:
	op.operand_two_code = f"[{
	MEMORY_TABLE[op.operand_two] % f' + {op.data}'}]"
	if op.mode == 0b11: # register mode (no displacement)
	op.operand_two_code = REGISTER_TABLE[op.operand_two \| (
	op.word << 3)]

	if direct_access:
	op.operand_two_code = op.operand_two_code % str(op.data)

	if op.code == OperationCode.MOV_IMM_TO_REG_MEM:
	op.direction = False
	if op.word:
	data_low = next_byte()
	data_high = next_byte()
	op.data = data_low \| (data_high << 8)
	else:
	data_low = next_byte()
	op.data = data_low
	op.operand_one_code = f"{'word' if op.word else 'byte'} {op.data}"

	if op.direction:
	result = f"mov {op.operand_one_code}, {op.operand_two_code}"
	else:
	result = f"mov {op.operand_two_code}, {op.operand_one_code}"

	elif op.code in (OperationCode.MOV_IMM_TO_REG_LOW_LOW, OperationCode.MOV_IMM_TO_REG_LOW_HIGH,
	OperationCode.MOV_IMM_TO_REG_HIGH_LOW, OperationCode.MOV_IMM_TO_REG_HIGH_HIGH):
	op.word = bool((first_byte >> 3) & 0b1)
	op.operand_one = first_byte & 0b00000111
	op.operand_one_code = REGISTER_TABLE[op.operand_one \| (op.word << 3)]
	data_low = next_byte()
	if op.word:
	data_high = next_byte()
	op.data = struct.unpack('h', bytes([data_low, data_high]))[0]
	else:
	op.data = struct.unpack('b', bytes([data_low]))[0]
	result = f"mov {op.operand_one_code}, {op.data}"

	elif op.code == OperationCode.MOV_MEM_ACC_TO_MEM_ACC:
	addr_low = next_byte()
	addr_high = next_byte()
	op.data = (addr_high << 8) \| addr_low
	op.operand_one = op.data
	op.operand_one_code = f'[{op.data}]'
	op.operand_two_code = 0b000
	op.operand_two_code = REGISTER_TABLE[op.operand_two \| (op.word << 3)]
	if op.direction:
	result = f"mov {op.operand_one_code}, {op.operand_two_code}"
	else:
	result = f"mov {op.operand_two_code}, {op.operand_one_code}"
	else:
	return op, "unk"

	return op, result


	def disassemble(filename: str, show_address: bool = False, show_bytes: bool = False) -> str:
	with open(filename, 'rb') as f:
	bytes_stream = bytearray(f.read())

	lines = []
	lines.append(f"; {filename}\n")
	if not show_address and not show_bytes:
	lines.append("bits 16\n")

	i = 0
	while i < len(bytes_stream):
	op, decoded_instruction = decode_operation(bytes_stream, i)

	line_parts = []
	if show_address:
	line_parts.append(f"0x{i:04x}")

	if show_bytes:
	bytes_str = " ".join([f"{b:02x}" for b in op.bytes])
	bytes_str = bytes_str.ljust(MAX_INSTRUCTION_BYTES * 3 - 1)
	line_parts.append(f"\| {bytes_str} \|")

	line_parts.append(decoded_instruction)
	lines.append(" ".join(line_parts))

	i += len(op.bytes)

	return "\n".join(lines)


	def main():
	# filename = 'listing_0039_more_movs'
	filename = 'listing_0040_challenge_movs'
	result = disassemble(filename,
	show_address=False, show_bytes=False)
	print(result)


	if __name__ == "__main__":
	main()