kddnewton/assembler.rs

## assembler.rs
/// This is the opcode of the instruction.
enum Op {
    Add,
    Sub,
    Mov,
}

/// This is a reference to a register. It is passed around when generating the
/// IR through the Assembler struct. When it gets loaded into an Insn struct as
/// part of a generated instruction, it is wrapped into an Opnd::Reg.
struct Reg {
    idx: u8,
    num_bits: u8,
}

/// This is a reference to a memory location. It is passed around when
/// generating the IR much like the Reg struct. When it gets loaded into an Insn
/// struct as part of a generated instruction, it is wrapped into an Opnd::Mem.
struct Mem {
    base: Reg,
    disp: i32,
    num_bits: u8,
}

/// This is an operand to a generated IR instruction. It wraps the raw values
/// that are actually being used into an enum to make it easier to work with
/// once instructions have been generated.
enum Opnd {
    Imm(i64),
    UImm(u64),
    Reg(Reg),
    Mem(Mem),
}

/// This is a trait that represents which arguments are allowed to be passed
/// into the Assembler's add function. You _could_ use the Opnd enum for both
/// arguments, then match against which combinations are allowed. In this
/// design however, we say that the set of operands is a generic type that
/// implements this trait. In doing so, we limit the allowed set of types that
/// can be passed into the add function and give ourselves type safety at
/// compile time.
trait IntoAddOpnds {
    fn into_add_opnds(self) -> Vec<Opnd>;
}

/// This is an example implementation of the IntoAddOpnds trait. It is saying
/// that a (Reg, i64) pair is an allowed set of operands to the add instruction.
/// We could similarly implement this for (i64, i64) if we wanted to allow the
/// user to pass in two immediates. In this way, we eliminate the need to match
/// against the operands in the add function as we've effectively gone from a
/// deny-list approach (e.g., calling unreachable!() within the add function) to
/// an allow-list approach (by only implementing the allowed set of operands).
impl IntoAddOpnds for (Reg, i64) {
    fn into_add_opnds(self) -> Vec<Opnd> {
        vec![Opnd::Reg(self.0), Opnd::Imm(self.1)]
    }
}

/// This is a built instruction for the IR. It is stored within the Assembler
/// object as a flat list. Once we get through our instruction generation
/// functions, everything normalizes out to the Opnd enum to make it easier to
/// work with and debug.
struct Insn {
    pub op: Op,
    pub opnds: Vec<Opnd>,
}

impl Insn {
    fn new(op: Op, opnds: Vec<Opnd>) -> Self {
        Insn { op, opnds }
    }
}

/// This is the overall assembler that is going to be passed around the code
/// generation functions. At the moment it's just a flat set of instructions,
/// but in the future it could store live ranges, comments, additional metadata,
/// etc.
struct Assembler {
    insns: Vec<Insn>
}

impl Assembler {
    fn new() -> Assembler {
        Assembler { insns: Vec::default() }
    }

    /// This IR instruction generation function accepts a set of operands that
    /// know how to convert themselves into a Vec<Opnd>. This limits them to
    /// only a known set of types. We can slowly introduce more as we support
    /// them. This has the added benefit of documenting which types are allowed
    /// explicitly.
    fn add<T: IntoAddOpnds>(&mut self, opnds: T) {
        self.insns.push(Insn::new(Op::Add, opnds.into_add_opnds()));
    }
}

#[test]
fn test_add() {
    let mut asm = Assembler::new();

    // This call will succeed because (Reg, i64) implements IntoAddOpnds.
    asm.add((Reg { idx: 0, num_bits: 8 }, 1));

    // This call will fail at compile time because (Reg, u64) does not implement
    // IntoAddOpnds.
    let opnd2: u64 = 2;
    asm.add((Reg { idx: 0, num_bits: 8 }, opnd2));
}
	/// This is the opcode of the instruction.
	enum Op {
	Add,
	Sub,
	Mov,
	}

	/// This is a reference to a register. It is passed around when generating the
	/// IR through the Assembler struct. When it gets loaded into an Insn struct as
	/// part of a generated instruction, it is wrapped into an Opnd::Reg.
	struct Reg {
	idx: u8,
	num_bits: u8,
	}

	/// This is a reference to a memory location. It is passed around when
	/// generating the IR much like the Reg struct. When it gets loaded into an Insn
	/// struct as part of a generated instruction, it is wrapped into an Opnd::Mem.
	struct Mem {
	base: Reg,
	disp: i32,
	num_bits: u8,
	}

	/// This is an operand to a generated IR instruction. It wraps the raw values
	/// that are actually being used into an enum to make it easier to work with
	/// once instructions have been generated.
	enum Opnd {
	Imm(i64),
	UImm(u64),
	Reg(Reg),
	Mem(Mem),
	}

	/// This is a trait that represents which arguments are allowed to be passed
	/// into the Assembler's add function. You _could_ use the Opnd enum for both
	/// arguments, then match against which combinations are allowed. In this
	/// design however, we say that the set of operands is a generic type that
	/// implements this trait. In doing so, we limit the allowed set of types that
	/// can be passed into the add function and give ourselves type safety at
	/// compile time.
	trait IntoAddOpnds {
	fn into_add_opnds(self) -> Vec<Opnd>;
	}

	/// This is an example implementation of the IntoAddOpnds trait. It is saying
	/// that a (Reg, i64) pair is an allowed set of operands to the add instruction.
	/// We could similarly implement this for (i64, i64) if we wanted to allow the
	/// user to pass in two immediates. In this way, we eliminate the need to match
	/// against the operands in the add function as we've effectively gone from a
	/// deny-list approach (e.g., calling unreachable!() within the add function) to
	/// an allow-list approach (by only implementing the allowed set of operands).
	impl IntoAddOpnds for (Reg, i64) {
	fn into_add_opnds(self) -> Vec<Opnd> {
	vec![Opnd::Reg(self.0), Opnd::Imm(self.1)]
	}
	}

	/// This is a built instruction for the IR. It is stored within the Assembler
	/// object as a flat list. Once we get through our instruction generation
	/// functions, everything normalizes out to the Opnd enum to make it easier to
	/// work with and debug.
	struct Insn {
	pub op: Op,
	pub opnds: Vec<Opnd>,
	}

	impl Insn {
	fn new(op: Op, opnds: Vec<Opnd>) -> Self {
	Insn { op, opnds }
	}
	}

	/// This is the overall assembler that is going to be passed around the code
	/// generation functions. At the moment it's just a flat set of instructions,
	/// but in the future it could store live ranges, comments, additional metadata,
	/// etc.
	struct Assembler {
	insns: Vec<Insn>
	}

	impl Assembler {
	fn new() -> Assembler {
	Assembler { insns: Vec::default() }
	}

	/// This IR instruction generation function accepts a set of operands that
	/// know how to convert themselves into a Vec<Opnd>. This limits them to
	/// only a known set of types. We can slowly introduce more as we support
	/// them. This has the added benefit of documenting which types are allowed
	/// explicitly.
	fn add<T: IntoAddOpnds>(&mut self, opnds: T) {
	self.insns.push(Insn::new(Op::Add, opnds.into_add_opnds()));
	}
	}

	#[test]
	fn test_add() {
	let mut asm = Assembler::new();

	// This call will succeed because (Reg, i64) implements IntoAddOpnds.
	asm.add((Reg { idx: 0, num_bits: 8 }, 1));

	// This call will fail at compile time because (Reg, u64) does not implement
	// IntoAddOpnds.
	let opnd2: u64 = 2;
	asm.add((Reg { idx: 0, num_bits: 8 }, opnd2));
	}