paxbun/float32.v

## float32.v
module float32_splitter
(
	input	[31:0]	in,
	output			sgn,
	output	[8:0]	exp,
	output	[23:0]	man
);
	assign sgn = in[31];
	assign exp[8] = 0;
	assign exp[7:1] = in[30:24];
	assign man[23] = ((in[30] | in[29]) | (in[28] | in[27])) | ((in[26] | in[25]) | (in[24] | in[23]));
	assign exp[0] = in[23] | !man[23];
	assign man[22:0] = in[22:0];
endmodule

module float32_biased_exp_adder
(
	input	[8:0]	lhs,
	input	[8:0]	rhs,
	output	[8:0]	res
);
	wire [8:0] doubly_biased;
	uint_add #(.WIDTH(9)) adder_doubly_biased
	(
		.lhs(lhs),
		.rhs(rhs),
		.res(doubly_biased)
	);
	uint_add #(.WIDTH(9)) adder
	(
		.lhs(doubly_biased),
		.rhs(9'b110000001), // -127
		.res(res)
	);
endmodule

module float32_unbiased_exp_subtractor
(
	input	[8:0]	lhs,
	input	[8:0]	rhs,
	output	[8:0]	res
);
	wire [8:0] rhs_neg;
	uint_neg #(.WIDTH(9)) negator
	(
		.in(rhs),
		.res(rhs_neg)
	);
	uint_add #(.WIDTH(9)) adder_unbiased
	(
		.lhs(lhs),
		.rhs(rhs_neg),
		.res(res)
	);
endmodule

module float32_mul
(
	input		[31:0] lhs,
	input		[31:0] rhs,
	output	reg	[31:0] res
);
	wire sgn_lhs, sgn_rhs;
	wire [8:0] exp_lhs, exp_rhs;
	wire [8:0] exp_sum, exp_sum_plus_1;
	wire [23:0] man_lhs, man_rhs;
	wire [47:0] man_prod;

	float32_splitter lhs_splitter
	(
		.in(lhs),
		.sgn(sgn_lhs),
		.exp(exp_lhs),
		.man(man_lhs)
	);
	float32_splitter rhs_splitter
	(
		.in(rhs),
		.sgn(sgn_rhs),
		.exp(exp_rhs),
		.man(man_rhs)
	);
	float32_biased_exp_adder exp_adder
	(
		.lhs(exp_lhs),
		.rhs(exp_rhs),
		.res(exp_sum)
	);
	uint_add #(.WIDTH(9)) exp_adder_plus_1
	(
		.lhs(exp_sum),
		.rhs(9'd1),
		.res(exp_sum_plus_1)
	);
	uint_mul #(.WIDTH(24)) man_multiplier
	(
		.lhs(man_lhs),
		.rhs(man_rhs),
		.res(man_prod)
	);

	always @(*) begin
		if (exp_lhs == 255 || exp_rhs == 255) begin
			// NaN and infinity
			// TODO
			res = 0;
		end
		else begin
			// finite
			res[31] = sgn_lhs ^ sgn_rhs;
			if (man_prod[47] == 1) begin
				if (exp_sum_plus_1[8] == 1 || exp_sum_plus_1[7:0] == 8'hFF) begin
					// result is infinity
					res[30:23] = 8'hFF;
					res[22:0] = 0;
				end
				else begin
					res[30:23] = exp_sum_plus_1[7:0];
					res[22:0] = man_prod[46:24];
				end
			end
			else begin
				if (exp_sum[8] == 1 || exp_sum[7:0] == 8'hFF) begin
					// result is infinity
					res[30:23] = 8'hFF;
					res[22:0] = 0;
				end
				else begin
					res[30:23] = exp_sum[7:0];
					res[22:0] = man_prod[45:23];
				end
			end
		end
	end
endmodule

module float32_swap_by_exp
(
	input	[31:0]	lhs,
	input	[31:0]	rhs,
	output	[31:0]	lhs_out,
	output	[31:0]	rhs_out
);
	wire exp_equal, exp_comp;
	uint_comp #(.WIDTH(8)) exp_comparator
	(
		.lhs(lhs[30:23]),
		.rhs(rhs[30:23]),
		.equal(exp_equal),
		.res(exp_comp)
	);
	uint_mux_2 #(.WIDTH(32)) lhs_selector
	(
		.in0(lhs),
		.in1(rhs),
		.sel(~exp_equal & exp_comp),
		.res(lhs_out)
	);
	uint_mux_2 #(.WIDTH(32)) rhs_selector
	(
		.in0(rhs),
		.in1(lhs),
		.sel(~exp_equal & exp_comp),
		.res(rhs_out)
	);
endmodule

module float32_exp_bit_idx
(
	input	[24:0]	in,
	output	[8:0]	exp_offset,
	output	[4:0]	left_shift
);
	wire [8:0] mux_res_exp_offset[24:0];
	wire [4:0] mux_res_left_shift[24:0];
	genvar i;
	generate
		uint_mux_2 #(.WIDTH(9)) mux1_zero (
			.in0(9'b111101000), // -24
			.in1(9'b111101001), // -23
			.sel(in[0]),
			.res(mux_res_exp_offset[0])
		);
		uint_mux_2 #(.WIDTH(5)) mux2_zero (
			.in0(5'b11001), // 25
			.in1(5'b11000), // 24
			.sel(in[0]),
			.res(mux_res_left_shift[0])
		);
		for (i = 0; i < 25; i = i + 1) begin
			parameter [8:0] i_minus_23 = i - 23;
			parameter [4:0] _24_minus_i = 24 - i;
			uint_mux_2 #(.WIDTH(9)) mux1 (
				.in0(mux_res_exp_offset[i - 1]), .in1(i_minus_23), .sel(in[i]),
				.res(mux_res_exp_offset[i])
			);
			uint_mux_2 #(.WIDTH(5)) mux2 (
				.in0(mux_res_left_shift[i - 1]), .in1(_24_minus_i), .sel(in[i]),
				.res(mux_res_left_shift[i])
			);
		end
	endgenerate
	assign exp_offset = mux_res_exp_offset[24];
	assign left_shift = mux_res_left_shift[24];
endmodule

module float32_add
(
	input		[31:0]	lhs,
	input		[31:0]	rhs,
	output		[31:0]	res
);
	wire [31:0] lhs_swapped, rhs_swapped;
	float32_swap_by_exp swapper
	(
		.lhs(lhs), .rhs(rhs),
		.lhs_out(lhs_swapped), .rhs_out(rhs_swapped)
	);
	wire sgn_lhs, sgn_rhs;
	wire [8:0] exp_lhs, exp_rhs;
	wire [23:0] man_lhs, man_rhs;

	float32_splitter lhs_splitter
	(
		.in(lhs_swapped),
		.sgn(sgn_lhs),
		.exp(exp_lhs),
		.man(man_lhs)
	);
	float32_splitter rhs_splitter
	(
		.in(rhs_swapped),
		.sgn(sgn_rhs),
		.exp(exp_rhs),
		.man(man_rhs)
	);

	wire [25:0] man_lhs_ex, man_rhs_ex;
	assign man_lhs_ex[25:24] = 0;
	assign man_lhs_ex[23:0] = man_lhs;
	assign man_rhs_ex[25:24] = 0;
	assign man_rhs_ex[23:0] = man_rhs;

	wire [25:0] man_lhs_ex_neg, man_rhs_ex_neg;
	uint_neg #(.WIDTH(26)) man_lhs_ex_negator
	(
		.in(man_lhs_ex),
		.res(man_lhs_ex_neg)
	);
	uint_neg #(.WIDTH(26)) man_rhs_ex_negator
	(
		.in(man_rhs_ex),
		.res(man_rhs_ex_neg)
	);

	wire [25:0] man_lhs_fin, man_rhs_fin;
	uint_mux_2 #(.WIDTH(26)) man_lhs_mux
	(
		.in0(man_lhs_ex),
		.in1(man_lhs_ex_neg),
		.sel(sgn_lhs),
		.res(man_lhs_fin)
	);
	uint_mux_2 #(.WIDTH(26)) man_rhs_mux
	(
		.in0(man_rhs_ex),
		.in1(man_rhs_ex_neg),
		.sel(sgn_rhs),
		.res(man_rhs_fin)
	);

	wire [8:0] exp_diff;
	float32_unbiased_exp_subtractor exp_subtractor
	(
		.lhs(exp_lhs),
		.rhs(exp_rhs),
		.res(exp_diff)
	);

	wire [25:0] man_rhs_fin_shifted;
	uint_sr #(.WIDTH(26), .SHIFT(9)) man_rhs_shifter
	(
		.in(man_rhs_fin),
		.shift(exp_diff),
		.logical(1'b0),
		.res(man_rhs_fin_shifted)
	);

	wire [25:0] man_sum, man_sum_neg, man_sum_fin;
	uint_add #(.WIDTH(26)) man_adder
	(
		.lhs(man_lhs_fin),
		.rhs(man_rhs_fin_shifted),
		.res(man_sum)
	);
	uint_neg #(.WIDTH(26)) man_sum_negator
	(
		.in(man_sum),
		.res(man_sum_neg)
	);
	uint_mux_2 #(.WIDTH(26)) man_sum_mux
	(
		.in0(man_sum),
		.in1(man_sum_neg),
		.sel(man_sum[25]),
		.res(man_sum_fin)
	);

	wire [8:0] exp_offset, exp_offset_added, exp_fin;
	wire [4:0] left_shift;
	float32_exp_bit_idx exp_bit_idx_calc
	(
		.in(man_sum_fin[24:0]),
		.exp_offset(exp_offset),
		.left_shift(left_shift)
	);
	uint_add #(.WIDTH(9)) exp_offset_adder
	(
		.lhs(exp_lhs),
		.rhs(exp_offset),
		.res(exp_offset_added)
	);

	wire [24:0] man_sum_shifted;
	wire [22:0] man_sum_shifted_fin;
	uint_sl #(.WIDTH(25), .SHIFT(5)) man_sum_shifter
	(
		.in(man_sum_fin[24:0]),
		.shift(left_shift),
		.res(man_sum_shifted)
	);
	uint_mux_2 #(.WIDTH(23)) man_sum_shifted_mux
	(
		.in0(man_sum_shifted[23:1]),
		.in1(man_sum_fin[22:0]),
		.sel(exp_offset_added[8]),
		.res(man_sum_shifted_fin)
	);

	uint_mux_2 #(.WIDTH(9)) exp_fin_mux
	(
		.in0(exp_offset_added),
		.in1(9'b0),
		.sel(
			exp_offset_added[8] | !(
				((man_sum_shifted_fin[22] | (man_sum_shifted_fin[21] | man_sum_shifted_fin[20]))
				| ((man_sum_shifted_fin[19] | man_sum_shifted_fin[18]) | (man_sum_shifted_fin[17] | man_sum_shifted_fin[16])))
				|
				(((man_sum_shifted_fin[15] | man_sum_shifted_fin[14]) | (man_sum_shifted_fin[13] | man_sum_shifted_fin[12]))
				| ((man_sum_shifted_fin[11] | man_sum_shifted_fin[10]) | (man_sum_shifted_fin[9] | man_sum_shifted_fin[8])))
				|
				(((man_sum_shifted_fin[7] | man_sum_shifted_fin[6]) | (man_sum_shifted_fin[5] | man_sum_shifted_fin[4]))
				| ((man_sum_shifted_fin[3] | man_sum_shifted_fin[2]) | (man_sum_shifted_fin[1] | man_sum_shifted_fin[0])))
			)
		),
		.res(exp_fin)
	);

	// TODO: handle NaN and infinity
	assign res[31] = man_sum[25];
	assign res[30:23] = exp_fin[7:0];
	assign res[22:0] = man_sum_shifted_fin;
endmodule

## tb_float32.v
module tb_float32;
	reg [31:0] lhs, rhs;
	wire [31:0] prod, sum;
    float32_mul multiplier (.lhs(lhs), .rhs(rhs), .res(prod));
    float32_add adder (.lhs(lhs), .rhs(rhs), .res(sum));
	initial begin
        lhs = 32'h3FC94120;
        rhs = 32'h455AA314;
        #5;
        $display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
        $display("actual:  %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
        $display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
        $display("actual:  %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
        #5;
        lhs = 32'hC6C23242;
        rhs = 32'h45B7591A;
        #5;
        $display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
        $display("actual:  %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
        $display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
        $display("actual:  %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
        #5;
        lhs = 32'h48E07967;
        rhs = 32'h455AA314;
        #5;
        $display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
        $display("actual:  %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
        $display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
        $display("actual:  %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
        #5;
        lhs = 32'h48E07967;
        rhs = 32'h0;
        #5;
        $display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
        $display("actual:  %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
        $display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
        $display("actual:  %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
        #5;
        lhs = 32'h48E07967;
        rhs = 32'hC8E07967;
        #5;
        $display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
        $display("actual:  %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
        $display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
        $display("actual:  %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
        #5;
	end
endmodule

## uint.v
module uint_bit_add
(
	input lhs,
	input rhs,
	input carry,
	output res,
	output carry_out
);
	wire xor_1, and_1, and_2;
	assign xor_1 = lhs ^ rhs;
	assign and_1 = xor_1 & carry;
	assign and_2 = lhs & rhs;
	assign res = xor_1 ^ carry;
	assign carry_out = and_1 | and_2;
endmodule

module uint_add # (parameter WIDTH = 8)
(
	input [(WIDTH - 1):0] lhs,
	input [(WIDTH - 1):0] rhs,
	output [(WIDTH - 1):0] res
);
	wire [WIDTH:0] carry;
	assign carry[0] = 0;
	genvar i;
	generate
		for (i = 0; i < WIDTH; i = i + 1) begin : adders
			uint_bit_add adder
			(
				.lhs(lhs[i]),
				.rhs(rhs[i]),
				.carry(carry[i]),
				.res(res[i]),
				.carry_out(carry[i + 1])
			);
		end
	endgenerate
endmodule

module uint_bit_mul # (parameter WIDTH = 8)
(
	input [(WIDTH * 2 - 1):0] lhs,
	input rhs,
	output [(WIDTH * 2 - 1):0] res
);
	genvar i;
	generate
		for (i = 0; i < WIDTH * 2; i = i + 1) begin : ands
			assign res[i] = lhs[i] & rhs;
		end
	endgenerate
endmodule

module uint_mul # (parameter WIDTH = 8)
(
	input  [(WIDTH - 1):0] lhs,
	input  [(WIDTH - 1):0] rhs,
	output [(WIDTH * 2 - 1):0] res
);
	wire [(WIDTH * 2 - 1):0] interm [(WIDTH - 1):0];
	wire [(WIDTH * 2 - 1):0] prod [(WIDTH - 1):0];
	wire [(WIDTH * 2 - 1):0] sum [WIDTH:0];
	assign sum[0][(WIDTH * 2 - 1):0] = 0;

	genvar i;
	generate
		for (i = 0; i < WIDTH; i = i + 1) begin : multipliers
			if (i != 0)
				assign interm[i][i - 1:0] = 0;
			assign interm[i][(i + WIDTH - 1):i] = lhs;
			assign interm[i][(WIDTH * 2 - 1):(i + WIDTH)] = 0;

			uint_bit_mul #(.WIDTH(WIDTH)) multiplier
			(
				.lhs(interm[i]),
				.rhs(rhs[i]),
				.res(prod[i])
			);
			uint_add #(.WIDTH(WIDTH * 2)) adder
			(
				.lhs(prod[i]),
				.rhs(sum[i]),
				.res(sum[i + 1])
			);
		end
	endgenerate

	assign res = sum[WIDTH];
endmodule

module uint_mux_2 # (parameter WIDTH = 8)
(
	input	[(WIDTH - 1):0] in0,
	input	[(WIDTH - 1):0] in1,
	input	sel,
	output	[(WIDTH - 1):0] res
);
	assign res = sel ? in1 : in0;
endmodule

module uint_bit_comp
(
	input	lhs,
	input	rhs,
	output	equal,
	output	res
);
	assign equal = ~(lhs ^ rhs);
	assign res = rhs;
endmodule

module uint_comp # (parameter WIDTH = 8)
(
	input	[(WIDTH - 1):0] lhs,
	input	[(WIDTH - 1):0] rhs,
	output	equal,
	output	res
);
	wire equal_res[(WIDTH - 1):0];
	wire equal_res_cum[(WIDTH - 1):0];
	wire comp_res[(WIDTH - 1):0];
	wire mux_res[(WIDTH - 1):0];

	genvar i;
	generate
		for (i = 0; i < WIDTH; i = i + 1) begin : comparators
			uint_bit_comp bit_comp
			(
				.lhs(lhs[i]),
				.rhs(rhs[i]),
				.equal(equal_res[i]),
				.res(comp_res[i])
			);
			if (i == 0) begin
				assign equal_res_cum[i] = equal_res[i];
				assign mux_res[i] = comp_res[i];
			end
			else begin
				uint_mux_2 #(.WIDTH(1)) bit_mux
				(
					.in0(comp_res[i]),
					.in1(mux_res[i - 1]),
					.sel(equal_res[i]),
					.res(mux_res[i])
				);
				assign equal_res_cum[i] = equal_res[i] & equal_res_cum[i - 1];
			end
		end
	endgenerate

	assign equal = equal_res_cum[WIDTH - 1];
	assign res = mux_res[WIDTH - 1];
endmodule

module uint_sr_const # (parameter WIDTH = 8, parameter SHIFT = 1)
(
	input	[(WIDTH - 1):0] in,
	input	logical,
	output	[(WIDTH - 1):0] res
);
	genvar i;
	generate
		if (SHIFT <= 0) begin
			assign res = in;
		end
		else if (SHIFT < WIDTH) begin
			for (i = WIDTH - 1; i >= WIDTH - SHIFT; i = i - 1) begin
				assign res[i] = (~logical) & in[WIDTH - 1];
			end
			assign res[(WIDTH - SHIFT - 1):0] = in[(WIDTH - 1):SHIFT];
		end
		else begin
			for (i = 0; i < WIDTH; i = i + 1) begin
				assign res[i] = (~logical) & in[WIDTH - 1];
			end
		end
	endgenerate
endmodule

module uint_sr # (parameter WIDTH = 8, parameter SHIFT = 3)
(
	input	[(WIDTH - 1):0] in,
	input	[(SHIFT - 1):0] shift,
	input	logical,
	output	[(WIDTH - 1):0] res
);
	genvar i;
	wire [(WIDTH - 1):0] shift_res[SHIFT:0];
	wire [(WIDTH - 1):0] mux_true[SHIFT:0];

	assign shift_res[0] = in;
	generate
		for (i = 0; i < SHIFT; i = i + 1) begin : shifters
			uint_sr_const #(.WIDTH(WIDTH), .SHIFT(1 << i)) shifter
			(
				.in(shift_res[i]),
				.logical(logical),
				.res(mux_true[i])
			);
			uint_mux_2 #(.WIDTH(WIDTH)) mux
			(
				.in0(shift_res[i]),
				.in1(mux_true[i]),
				.sel(shift[i]),
				.res(shift_res[i + 1])
			);
		end
	endgenerate
	assign res = shift_res[SHIFT];
endmodule

module uint_sl_const # (parameter WIDTH = 8, parameter SHIFT = 1)
(
	input	[(WIDTH - 1):0] in,
	output	[(WIDTH - 1):0] res
);
	if (SHIFT <= 0) begin
		assign res = in;
	end
	else if (SHIFT < WIDTH) begin
		assign res[(WIDTH - 1):SHIFT] = in[(WIDTH - SHIFT - 1):0];
		assign res[(SHIFT - 1):0] = 0;
	end
	else begin
		assign res = 0;
	end
endmodule

module uint_sl # (parameter WIDTH = 8, parameter SHIFT = 3)
(
	input	[(WIDTH - 1):0] in,
	input	[(SHIFT - 1):0] shift,
	output	[(WIDTH - 1):0] res
);
	genvar i;
	wire [(WIDTH - 1):0] shift_res[SHIFT:0];
	wire [(WIDTH - 1):0] mux_true[SHIFT:0];

	assign shift_res[0] = in;
	generate
		for (i = 0; i < SHIFT; i = i + 1) begin : shifters
			uint_sl_const #(.WIDTH(WIDTH), .SHIFT(1 << i)) shifter
			(
				.in(shift_res[i]),
				.res(mux_true[i])
			);
			uint_mux_2 #(.WIDTH(WIDTH)) mux
			(
				.in0(shift_res[i]),
				.in1(mux_true[i]),
				.sel(shift[i]),
				.res(shift_res[i + 1])
			);
		end
	endgenerate
	assign res = shift_res[SHIFT];
endmodule

module uint_neg # (parameter WIDTH = 8)
(
	input	[(WIDTH - 1):0] in,
	output	[(WIDTH - 1):0] res
);
	parameter [(WIDTH - 1):0] one = 1;
	uint_add #(.WIDTH(WIDTH)) adder
	(
		.lhs(~in),
		.rhs(one),
		.res(res)
	);
endmodule
	module float32_splitter
	(
	input [31:0] in,
	output sgn,
	output [8:0] exp,
	output [23:0] man
	);
	assign sgn = in[31];
	assign exp[8] = 0;
	assign exp[7:1] = in[30:24];
	assign man[23] = ((in[30] \| in[29]) \| (in[28] \| in[27])) \| ((in[26] \| in[25]) \| (in[24] \| in[23]));
	assign exp[0] = in[23] \| !man[23];
	assign man[22:0] = in[22:0];
	endmodule

	module float32_biased_exp_adder
	(
	input [8:0] lhs,
	input [8:0] rhs,
	output [8:0] res
	);
	wire [8:0] doubly_biased;
	uint_add #(.WIDTH(9)) adder_doubly_biased
	(
	.lhs(lhs),
	.rhs(rhs),
	.res(doubly_biased)
	);
	uint_add #(.WIDTH(9)) adder
	(
	.lhs(doubly_biased),
	.rhs(9'b110000001), // -127
	.res(res)
	);
	endmodule

	module float32_unbiased_exp_subtractor
	(
	input [8:0] lhs,
	input [8:0] rhs,
	output [8:0] res
	);
	wire [8:0] rhs_neg;
	uint_neg #(.WIDTH(9)) negator
	(
	.in(rhs),
	.res(rhs_neg)
	);
	uint_add #(.WIDTH(9)) adder_unbiased
	(
	.lhs(lhs),
	.rhs(rhs_neg),
	.res(res)
	);
	endmodule

	module float32_mul
	(
	input [31:0] lhs,
	input [31:0] rhs,
	output reg [31:0] res
	);
	wire sgn_lhs, sgn_rhs;
	wire [8:0] exp_lhs, exp_rhs;
	wire [8:0] exp_sum, exp_sum_plus_1;
	wire [23:0] man_lhs, man_rhs;
	wire [47:0] man_prod;

	float32_splitter lhs_splitter
	(
	.in(lhs),
	.sgn(sgn_lhs),
	.exp(exp_lhs),
	.man(man_lhs)
	);
	float32_splitter rhs_splitter
	(
	.in(rhs),
	.sgn(sgn_rhs),
	.exp(exp_rhs),
	.man(man_rhs)
	);
	float32_biased_exp_adder exp_adder
	(
	.lhs(exp_lhs),
	.rhs(exp_rhs),
	.res(exp_sum)
	);
	uint_add #(.WIDTH(9)) exp_adder_plus_1
	(
	.lhs(exp_sum),
	.rhs(9'd1),
	.res(exp_sum_plus_1)
	);
	uint_mul #(.WIDTH(24)) man_multiplier
	(
	.lhs(man_lhs),
	.rhs(man_rhs),
	.res(man_prod)
	);

	always @(*) begin
	if (exp_lhs == 255 \|\| exp_rhs == 255) begin
	// NaN and infinity
	// TODO
	res = 0;
	end
	else begin
	// finite
	res[31] = sgn_lhs ^ sgn_rhs;
	if (man_prod[47] == 1) begin
	if (exp_sum_plus_1[8] == 1 \|\| exp_sum_plus_1[7:0] == 8'hFF) begin
	// result is infinity
	res[30:23] = 8'hFF;
	res[22:0] = 0;
	end
	else begin
	res[30:23] = exp_sum_plus_1[7:0];
	res[22:0] = man_prod[46:24];
	end
	end
	else begin
	if (exp_sum[8] == 1 \|\| exp_sum[7:0] == 8'hFF) begin
	// result is infinity
	res[30:23] = 8'hFF;
	res[22:0] = 0;
	end
	else begin
	res[30:23] = exp_sum[7:0];
	res[22:0] = man_prod[45:23];
	end
	end
	end
	end
	endmodule

	module float32_swap_by_exp
	(
	input [31:0] lhs,
	input [31:0] rhs,
	output [31:0] lhs_out,
	output [31:0] rhs_out
	);
	wire exp_equal, exp_comp;
	uint_comp #(.WIDTH(8)) exp_comparator
	(
	.lhs(lhs[30:23]),
	.rhs(rhs[30:23]),
	.equal(exp_equal),
	.res(exp_comp)
	);
	uint_mux_2 #(.WIDTH(32)) lhs_selector
	(
	.in0(lhs),
	.in1(rhs),
	.sel(~exp_equal & exp_comp),
	.res(lhs_out)
	);
	uint_mux_2 #(.WIDTH(32)) rhs_selector
	(
	.in0(rhs),
	.in1(lhs),
	.sel(~exp_equal & exp_comp),
	.res(rhs_out)
	);
	endmodule

	module float32_exp_bit_idx
	(
	input [24:0] in,
	output [8:0] exp_offset,
	output [4:0] left_shift
	);
	wire [8:0] mux_res_exp_offset[24:0];
	wire [4:0] mux_res_left_shift[24:0];
	genvar i;
	generate
	uint_mux_2 #(.WIDTH(9)) mux1_zero (
	.in0(9'b111101000), // -24
	.in1(9'b111101001), // -23
	.sel(in[0]),
	.res(mux_res_exp_offset[0])
	);
	uint_mux_2 #(.WIDTH(5)) mux2_zero (
	.in0(5'b11001), // 25
	.in1(5'b11000), // 24
	.sel(in[0]),
	.res(mux_res_left_shift[0])
	);
	for (i = 0; i < 25; i = i + 1) begin
	parameter [8:0] i_minus_23 = i - 23;
	parameter [4:0] _24_minus_i = 24 - i;
	uint_mux_2 #(.WIDTH(9)) mux1 (
	.in0(mux_res_exp_offset[i - 1]), .in1(i_minus_23), .sel(in[i]),
	.res(mux_res_exp_offset[i])
	);
	uint_mux_2 #(.WIDTH(5)) mux2 (
	.in0(mux_res_left_shift[i - 1]), .in1(_24_minus_i), .sel(in[i]),
	.res(mux_res_left_shift[i])
	);
	end
	endgenerate
	assign exp_offset = mux_res_exp_offset[24];
	assign left_shift = mux_res_left_shift[24];
	endmodule

	module float32_add
	(
	input [31:0] lhs,
	input [31:0] rhs,
	output [31:0] res
	);
	wire [31:0] lhs_swapped, rhs_swapped;
	float32_swap_by_exp swapper
	(
	.lhs(lhs), .rhs(rhs),
	.lhs_out(lhs_swapped), .rhs_out(rhs_swapped)
	);
	wire sgn_lhs, sgn_rhs;
	wire [8:0] exp_lhs, exp_rhs;
	wire [23:0] man_lhs, man_rhs;

	float32_splitter lhs_splitter
	(
	.in(lhs_swapped),
	.sgn(sgn_lhs),
	.exp(exp_lhs),
	.man(man_lhs)
	);
	float32_splitter rhs_splitter
	(
	.in(rhs_swapped),
	.sgn(sgn_rhs),
	.exp(exp_rhs),
	.man(man_rhs)
	);

	wire [25:0] man_lhs_ex, man_rhs_ex;
	assign man_lhs_ex[25:24] = 0;
	assign man_lhs_ex[23:0] = man_lhs;
	assign man_rhs_ex[25:24] = 0;
	assign man_rhs_ex[23:0] = man_rhs;

	wire [25:0] man_lhs_ex_neg, man_rhs_ex_neg;
	uint_neg #(.WIDTH(26)) man_lhs_ex_negator
	(
	.in(man_lhs_ex),
	.res(man_lhs_ex_neg)
	);
	uint_neg #(.WIDTH(26)) man_rhs_ex_negator
	(
	.in(man_rhs_ex),
	.res(man_rhs_ex_neg)
	);

	wire [25:0] man_lhs_fin, man_rhs_fin;
	uint_mux_2 #(.WIDTH(26)) man_lhs_mux
	(
	.in0(man_lhs_ex),
	.in1(man_lhs_ex_neg),
	.sel(sgn_lhs),
	.res(man_lhs_fin)
	);
	uint_mux_2 #(.WIDTH(26)) man_rhs_mux
	(
	.in0(man_rhs_ex),
	.in1(man_rhs_ex_neg),
	.sel(sgn_rhs),
	.res(man_rhs_fin)
	);

	wire [8:0] exp_diff;
	float32_unbiased_exp_subtractor exp_subtractor
	(
	.lhs(exp_lhs),
	.rhs(exp_rhs),
	.res(exp_diff)
	);

	wire [25:0] man_rhs_fin_shifted;
	uint_sr #(.WIDTH(26), .SHIFT(9)) man_rhs_shifter
	(
	.in(man_rhs_fin),
	.shift(exp_diff),
	.logical(1'b0),
	.res(man_rhs_fin_shifted)
	);

	wire [25:0] man_sum, man_sum_neg, man_sum_fin;
	uint_add #(.WIDTH(26)) man_adder
	(
	.lhs(man_lhs_fin),
	.rhs(man_rhs_fin_shifted),
	.res(man_sum)
	);
	uint_neg #(.WIDTH(26)) man_sum_negator
	(
	.in(man_sum),
	.res(man_sum_neg)
	);
	uint_mux_2 #(.WIDTH(26)) man_sum_mux
	(
	.in0(man_sum),
	.in1(man_sum_neg),
	.sel(man_sum[25]),
	.res(man_sum_fin)
	);

	wire [8:0] exp_offset, exp_offset_added, exp_fin;
	wire [4:0] left_shift;
	float32_exp_bit_idx exp_bit_idx_calc
	(
	.in(man_sum_fin[24:0]),
	.exp_offset(exp_offset),
	.left_shift(left_shift)
	);
	uint_add #(.WIDTH(9)) exp_offset_adder
	(
	.lhs(exp_lhs),
	.rhs(exp_offset),
	.res(exp_offset_added)
	);

	wire [24:0] man_sum_shifted;
	wire [22:0] man_sum_shifted_fin;
	uint_sl #(.WIDTH(25), .SHIFT(5)) man_sum_shifter
	(
	.in(man_sum_fin[24:0]),
	.shift(left_shift),
	.res(man_sum_shifted)
	);
	uint_mux_2 #(.WIDTH(23)) man_sum_shifted_mux
	(
	.in0(man_sum_shifted[23:1]),
	.in1(man_sum_fin[22:0]),
	.sel(exp_offset_added[8]),
	.res(man_sum_shifted_fin)
	);

	uint_mux_2 #(.WIDTH(9)) exp_fin_mux
	(
	.in0(exp_offset_added),
	.in1(9'b0),
	.sel(
	exp_offset_added[8] \| !(
	((man_sum_shifted_fin[22] \| (man_sum_shifted_fin[21] \| man_sum_shifted_fin[20]))
	\| ((man_sum_shifted_fin[19] \| man_sum_shifted_fin[18]) \| (man_sum_shifted_fin[17] \| man_sum_shifted_fin[16])))
	\|
	(((man_sum_shifted_fin[15] \| man_sum_shifted_fin[14]) \| (man_sum_shifted_fin[13] \| man_sum_shifted_fin[12]))
	\| ((man_sum_shifted_fin[11] \| man_sum_shifted_fin[10]) \| (man_sum_shifted_fin[9] \| man_sum_shifted_fin[8])))
	\|
	(((man_sum_shifted_fin[7] \| man_sum_shifted_fin[6]) \| (man_sum_shifted_fin[5] \| man_sum_shifted_fin[4]))
	\| ((man_sum_shifted_fin[3] \| man_sum_shifted_fin[2]) \| (man_sum_shifted_fin[1] \| man_sum_shifted_fin[0])))
	)
	),
	.res(exp_fin)
	);

	// TODO: handle NaN and infinity
	assign res[31] = man_sum[25];
	assign res[30:23] = exp_fin[7:0];
	assign res[22:0] = man_sum_shifted_fin;
	endmodule
	module tb_float32;
	reg [31:0] lhs, rhs;
	wire [31:0] prod, sum;
	float32_mul multiplier (.lhs(lhs), .rhs(rhs), .res(prod));
	float32_add adder (.lhs(lhs), .rhs(rhs), .res(sum));
	initial begin
	lhs = 32'h3FC94120;
	rhs = 32'h455AA314;
	#5;
	$display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
	$display("actual: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
	$display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
	$display("actual: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
	#5;
	lhs = 32'hC6C23242;
	rhs = 32'h45B7591A;
	#5;
	$display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
	$display("actual: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
	$display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
	$display("actual: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
	#5;
	lhs = 32'h48E07967;
	rhs = 32'h455AA314;
	#5;
	$display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
	$display("actual: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
	$display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
	$display("actual: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
	#5;
	lhs = 32'h48E07967;
	rhs = 32'h0;
	#5;
	$display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
	$display("actual: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
	$display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
	$display("actual: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
	#5;
	lhs = 32'h48E07967;
	rhs = 32'hC8E07967;
	#5;
	$display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
	$display("actual: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
	$display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
	$display("actual: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
	#5;
	end
	endmodule
	module uint_bit_add
	(
	input lhs,
	input rhs,
	input carry,
	output res,
	output carry_out
	);
	wire xor_1, and_1, and_2;
	assign xor_1 = lhs ^ rhs;
	assign and_1 = xor_1 & carry;
	assign and_2 = lhs & rhs;
	assign res = xor_1 ^ carry;
	assign carry_out = and_1 \| and_2;
	endmodule

	module uint_add # (parameter WIDTH = 8)
	(
	input [(WIDTH - 1):0] lhs,
	input [(WIDTH - 1):0] rhs,
	output [(WIDTH - 1):0] res
	);
	wire [WIDTH:0] carry;
	assign carry[0] = 0;
	genvar i;
	generate
	for (i = 0; i < WIDTH; i = i + 1) begin : adders
	uint_bit_add adder
	(
	.lhs(lhs[i]),
	.rhs(rhs[i]),
	.carry(carry[i]),
	.res(res[i]),
	.carry_out(carry[i + 1])
	);
	end
	endgenerate
	endmodule

	module uint_bit_mul # (parameter WIDTH = 8)
	(
	input [(WIDTH * 2 - 1):0] lhs,
	input rhs,
	output [(WIDTH * 2 - 1):0] res
	);
	genvar i;
	generate
	for (i = 0; i < WIDTH * 2; i = i + 1) begin : ands
	assign res[i] = lhs[i] & rhs;
	end
	endgenerate
	endmodule

	module uint_mul # (parameter WIDTH = 8)
	(
	input [(WIDTH - 1):0] lhs,
	input [(WIDTH - 1):0] rhs,
	output [(WIDTH * 2 - 1):0] res
	);
	wire [(WIDTH * 2 - 1):0] interm [(WIDTH - 1):0];
	wire [(WIDTH * 2 - 1):0] prod [(WIDTH - 1):0];
	wire [(WIDTH * 2 - 1):0] sum [WIDTH:0];
	assign sum[0][(WIDTH * 2 - 1):0] = 0;

	genvar i;
	generate
	for (i = 0; i < WIDTH; i = i + 1) begin : multipliers
	if (i != 0)
	assign interm[i][i - 1:0] = 0;
	assign interm[i][(i + WIDTH - 1):i] = lhs;
	assign interm[i][(WIDTH * 2 - 1):(i + WIDTH)] = 0;

	uint_bit_mul #(.WIDTH(WIDTH)) multiplier
	(
	.lhs(interm[i]),
	.rhs(rhs[i]),
	.res(prod[i])
	);
	uint_add #(.WIDTH(WIDTH * 2)) adder
	(
	.lhs(prod[i]),
	.rhs(sum[i]),
	.res(sum[i + 1])
	);
	end
	endgenerate

	assign res = sum[WIDTH];
	endmodule

	module uint_mux_2 # (parameter WIDTH = 8)
	(
	input [(WIDTH - 1):0] in0,
	input [(WIDTH - 1):0] in1,
	input sel,
	output [(WIDTH - 1):0] res
	);
	assign res = sel ? in1 : in0;
	endmodule

	module uint_bit_comp
	(
	input lhs,
	input rhs,
	output equal,
	output res
	);
	assign equal = ~(lhs ^ rhs);
	assign res = rhs;
	endmodule

	module uint_comp # (parameter WIDTH = 8)
	(
	input [(WIDTH - 1):0] lhs,
	input [(WIDTH - 1):0] rhs,
	output equal,
	output res
	);
	wire equal_res[(WIDTH - 1):0];
	wire equal_res_cum[(WIDTH - 1):0];
	wire comp_res[(WIDTH - 1):0];
	wire mux_res[(WIDTH - 1):0];

	genvar i;
	generate
	for (i = 0; i < WIDTH; i = i + 1) begin : comparators
	uint_bit_comp bit_comp
	(
	.lhs(lhs[i]),
	.rhs(rhs[i]),
	.equal(equal_res[i]),
	.res(comp_res[i])
	);
	if (i == 0) begin
	assign equal_res_cum[i] = equal_res[i];
	assign mux_res[i] = comp_res[i];
	end
	else begin
	uint_mux_2 #(.WIDTH(1)) bit_mux
	(
	.in0(comp_res[i]),
	.in1(mux_res[i - 1]),
	.sel(equal_res[i]),
	.res(mux_res[i])
	);
	assign equal_res_cum[i] = equal_res[i] & equal_res_cum[i - 1];
	end
	end
	endgenerate

	assign equal = equal_res_cum[WIDTH - 1];
	assign res = mux_res[WIDTH - 1];
	endmodule

	module uint_sr_const # (parameter WIDTH = 8, parameter SHIFT = 1)
	(
	input [(WIDTH - 1):0] in,
	input logical,
	output [(WIDTH - 1):0] res
	);
	genvar i;
	generate
	if (SHIFT <= 0) begin
	assign res = in;
	end
	else if (SHIFT < WIDTH) begin
	for (i = WIDTH - 1; i >= WIDTH - SHIFT; i = i - 1) begin
	assign res[i] = (~logical) & in[WIDTH - 1];
	end
	assign res[(WIDTH - SHIFT - 1):0] = in[(WIDTH - 1):SHIFT];
	end
	else begin
	for (i = 0; i < WIDTH; i = i + 1) begin
	assign res[i] = (~logical) & in[WIDTH - 1];
	end
	end
	endgenerate
	endmodule

	module uint_sr # (parameter WIDTH = 8, parameter SHIFT = 3)
	(
	input [(WIDTH - 1):0] in,
	input [(SHIFT - 1):0] shift,
	input logical,
	output [(WIDTH - 1):0] res
	);
	genvar i;
	wire [(WIDTH - 1):0] shift_res[SHIFT:0];
	wire [(WIDTH - 1):0] mux_true[SHIFT:0];

	assign shift_res[0] = in;
	generate
	for (i = 0; i < SHIFT; i = i + 1) begin : shifters
	uint_sr_const #(.WIDTH(WIDTH), .SHIFT(1 << i)) shifter
	(
	.in(shift_res[i]),
	.logical(logical),
	.res(mux_true[i])
	);
	uint_mux_2 #(.WIDTH(WIDTH)) mux
	(
	.in0(shift_res[i]),
	.in1(mux_true[i]),
	.sel(shift[i]),
	.res(shift_res[i + 1])
	);
	end
	endgenerate
	assign res = shift_res[SHIFT];
	endmodule

	module uint_sl_const # (parameter WIDTH = 8, parameter SHIFT = 1)
	(
	input [(WIDTH - 1):0] in,
	output [(WIDTH - 1):0] res
	);
	if (SHIFT <= 0) begin
	assign res = in;
	end
	else if (SHIFT < WIDTH) begin
	assign res[(WIDTH - 1):SHIFT] = in[(WIDTH - SHIFT - 1):0];
	assign res[(SHIFT - 1):0] = 0;
	end
	else begin
	assign res = 0;
	end
	endmodule

	module uint_sl # (parameter WIDTH = 8, parameter SHIFT = 3)
	(
	input [(WIDTH - 1):0] in,
	input [(SHIFT - 1):0] shift,
	output [(WIDTH - 1):0] res
	);
	genvar i;
	wire [(WIDTH - 1):0] shift_res[SHIFT:0];
	wire [(WIDTH - 1):0] mux_true[SHIFT:0];

	assign shift_res[0] = in;
	generate
	for (i = 0; i < SHIFT; i = i + 1) begin : shifters
	uint_sl_const #(.WIDTH(WIDTH), .SHIFT(1 << i)) shifter
	(
	.in(shift_res[i]),
	.res(mux_true[i])
	);
	uint_mux_2 #(.WIDTH(WIDTH)) mux
	(
	.in0(shift_res[i]),
	.in1(mux_true[i]),
	.sel(shift[i]),
	.res(shift_res[i + 1])
	);
	end
	endgenerate
	assign res = shift_res[SHIFT];
	endmodule

	module uint_neg # (parameter WIDTH = 8)
	(
	input [(WIDTH - 1):0] in,
	output [(WIDTH - 1):0] res
	);
	parameter [(WIDTH - 1):0] one = 1;
	uint_add #(.WIDTH(WIDTH)) adder
	(
	.lhs(~in),
	.rhs(one),
	.res(res)
	);
	endmodule