Skip to content

Instantly share code, notes, and snippets.

@paxbun
Last active July 7, 2021 17:58
Show Gist options
  • Save paxbun/98667e099b398be27de6bf4180051aa4 to your computer and use it in GitHub Desktop.
Save paxbun/98667e099b398be27de6bf4180051aa4 to your computer and use it in GitHub Desktop.
IEEE single-precision floating-point format addition/multiplication implementation (not handling NaN and INF)
module float32_splitter
(
input [31:0] in,
output sgn,
output [8:0] exp,
output [23:0] man
);
assign sgn = in[31];
assign exp[8] = 0;
assign exp[7:1] = in[30:24];
assign man[23] = ((in[30] | in[29]) | (in[28] | in[27])) | ((in[26] | in[25]) | (in[24] | in[23]));
assign exp[0] = in[23] | !man[23];
assign man[22:0] = in[22:0];
endmodule
module float32_biased_exp_adder
(
input [8:0] lhs,
input [8:0] rhs,
output [8:0] res
);
wire [8:0] doubly_biased;
uint_add #(.WIDTH(9)) adder_doubly_biased
(
.lhs(lhs),
.rhs(rhs),
.res(doubly_biased)
);
uint_add #(.WIDTH(9)) adder
(
.lhs(doubly_biased),
.rhs(9'b110000001), // -127
.res(res)
);
endmodule
module float32_unbiased_exp_subtractor
(
input [8:0] lhs,
input [8:0] rhs,
output [8:0] res
);
wire [8:0] rhs_neg;
uint_neg #(.WIDTH(9)) negator
(
.in(rhs),
.res(rhs_neg)
);
uint_add #(.WIDTH(9)) adder_unbiased
(
.lhs(lhs),
.rhs(rhs_neg),
.res(res)
);
endmodule
module float32_mul
(
input [31:0] lhs,
input [31:0] rhs,
output reg [31:0] res
);
wire sgn_lhs, sgn_rhs;
wire [8:0] exp_lhs, exp_rhs;
wire [8:0] exp_sum, exp_sum_plus_1;
wire [23:0] man_lhs, man_rhs;
wire [47:0] man_prod;
float32_splitter lhs_splitter
(
.in(lhs),
.sgn(sgn_lhs),
.exp(exp_lhs),
.man(man_lhs)
);
float32_splitter rhs_splitter
(
.in(rhs),
.sgn(sgn_rhs),
.exp(exp_rhs),
.man(man_rhs)
);
float32_biased_exp_adder exp_adder
(
.lhs(exp_lhs),
.rhs(exp_rhs),
.res(exp_sum)
);
uint_add #(.WIDTH(9)) exp_adder_plus_1
(
.lhs(exp_sum),
.rhs(9'd1),
.res(exp_sum_plus_1)
);
uint_mul #(.WIDTH(24)) man_multiplier
(
.lhs(man_lhs),
.rhs(man_rhs),
.res(man_prod)
);
always @(*) begin
if (exp_lhs == 255 || exp_rhs == 255) begin
// NaN and infinity
// TODO
res = 0;
end
else begin
// finite
res[31] = sgn_lhs ^ sgn_rhs;
if (man_prod[47] == 1) begin
if (exp_sum_plus_1[8] == 1 || exp_sum_plus_1[7:0] == 8'hFF) begin
// result is infinity
res[30:23] = 8'hFF;
res[22:0] = 0;
end
else begin
res[30:23] = exp_sum_plus_1[7:0];
res[22:0] = man_prod[46:24];
end
end
else begin
if (exp_sum[8] == 1 || exp_sum[7:0] == 8'hFF) begin
// result is infinity
res[30:23] = 8'hFF;
res[22:0] = 0;
end
else begin
res[30:23] = exp_sum[7:0];
res[22:0] = man_prod[45:23];
end
end
end
end
endmodule
module float32_swap_by_exp
(
input [31:0] lhs,
input [31:0] rhs,
output [31:0] lhs_out,
output [31:0] rhs_out
);
wire exp_equal, exp_comp;
uint_comp #(.WIDTH(8)) exp_comparator
(
.lhs(lhs[30:23]),
.rhs(rhs[30:23]),
.equal(exp_equal),
.res(exp_comp)
);
uint_mux_2 #(.WIDTH(32)) lhs_selector
(
.in0(lhs),
.in1(rhs),
.sel(~exp_equal & exp_comp),
.res(lhs_out)
);
uint_mux_2 #(.WIDTH(32)) rhs_selector
(
.in0(rhs),
.in1(lhs),
.sel(~exp_equal & exp_comp),
.res(rhs_out)
);
endmodule
module float32_exp_bit_idx
(
input [24:0] in,
output [8:0] exp_offset,
output [4:0] left_shift
);
wire [8:0] mux_res_exp_offset[24:0];
wire [4:0] mux_res_left_shift[24:0];
genvar i;
generate
uint_mux_2 #(.WIDTH(9)) mux1_zero (
.in0(9'b111101000), // -24
.in1(9'b111101001), // -23
.sel(in[0]),
.res(mux_res_exp_offset[0])
);
uint_mux_2 #(.WIDTH(5)) mux2_zero (
.in0(5'b11001), // 25
.in1(5'b11000), // 24
.sel(in[0]),
.res(mux_res_left_shift[0])
);
for (i = 0; i < 25; i = i + 1) begin
parameter [8:0] i_minus_23 = i - 23;
parameter [4:0] _24_minus_i = 24 - i;
uint_mux_2 #(.WIDTH(9)) mux1 (
.in0(mux_res_exp_offset[i - 1]), .in1(i_minus_23), .sel(in[i]),
.res(mux_res_exp_offset[i])
);
uint_mux_2 #(.WIDTH(5)) mux2 (
.in0(mux_res_left_shift[i - 1]), .in1(_24_minus_i), .sel(in[i]),
.res(mux_res_left_shift[i])
);
end
endgenerate
assign exp_offset = mux_res_exp_offset[24];
assign left_shift = mux_res_left_shift[24];
endmodule
module float32_add
(
input [31:0] lhs,
input [31:0] rhs,
output [31:0] res
);
wire [31:0] lhs_swapped, rhs_swapped;
float32_swap_by_exp swapper
(
.lhs(lhs), .rhs(rhs),
.lhs_out(lhs_swapped), .rhs_out(rhs_swapped)
);
wire sgn_lhs, sgn_rhs;
wire [8:0] exp_lhs, exp_rhs;
wire [23:0] man_lhs, man_rhs;
float32_splitter lhs_splitter
(
.in(lhs_swapped),
.sgn(sgn_lhs),
.exp(exp_lhs),
.man(man_lhs)
);
float32_splitter rhs_splitter
(
.in(rhs_swapped),
.sgn(sgn_rhs),
.exp(exp_rhs),
.man(man_rhs)
);
wire [25:0] man_lhs_ex, man_rhs_ex;
assign man_lhs_ex[25:24] = 0;
assign man_lhs_ex[23:0] = man_lhs;
assign man_rhs_ex[25:24] = 0;
assign man_rhs_ex[23:0] = man_rhs;
wire [25:0] man_lhs_ex_neg, man_rhs_ex_neg;
uint_neg #(.WIDTH(26)) man_lhs_ex_negator
(
.in(man_lhs_ex),
.res(man_lhs_ex_neg)
);
uint_neg #(.WIDTH(26)) man_rhs_ex_negator
(
.in(man_rhs_ex),
.res(man_rhs_ex_neg)
);
wire [25:0] man_lhs_fin, man_rhs_fin;
uint_mux_2 #(.WIDTH(26)) man_lhs_mux
(
.in0(man_lhs_ex),
.in1(man_lhs_ex_neg),
.sel(sgn_lhs),
.res(man_lhs_fin)
);
uint_mux_2 #(.WIDTH(26)) man_rhs_mux
(
.in0(man_rhs_ex),
.in1(man_rhs_ex_neg),
.sel(sgn_rhs),
.res(man_rhs_fin)
);
wire [8:0] exp_diff;
float32_unbiased_exp_subtractor exp_subtractor
(
.lhs(exp_lhs),
.rhs(exp_rhs),
.res(exp_diff)
);
wire [25:0] man_rhs_fin_shifted;
uint_sr #(.WIDTH(26), .SHIFT(9)) man_rhs_shifter
(
.in(man_rhs_fin),
.shift(exp_diff),
.logical(1'b0),
.res(man_rhs_fin_shifted)
);
wire [25:0] man_sum, man_sum_neg, man_sum_fin;
uint_add #(.WIDTH(26)) man_adder
(
.lhs(man_lhs_fin),
.rhs(man_rhs_fin_shifted),
.res(man_sum)
);
uint_neg #(.WIDTH(26)) man_sum_negator
(
.in(man_sum),
.res(man_sum_neg)
);
uint_mux_2 #(.WIDTH(26)) man_sum_mux
(
.in0(man_sum),
.in1(man_sum_neg),
.sel(man_sum[25]),
.res(man_sum_fin)
);
wire [8:0] exp_offset, exp_offset_added, exp_fin;
wire [4:0] left_shift;
float32_exp_bit_idx exp_bit_idx_calc
(
.in(man_sum_fin[24:0]),
.exp_offset(exp_offset),
.left_shift(left_shift)
);
uint_add #(.WIDTH(9)) exp_offset_adder
(
.lhs(exp_lhs),
.rhs(exp_offset),
.res(exp_offset_added)
);
wire [24:0] man_sum_shifted;
wire [22:0] man_sum_shifted_fin;
uint_sl #(.WIDTH(25), .SHIFT(5)) man_sum_shifter
(
.in(man_sum_fin[24:0]),
.shift(left_shift),
.res(man_sum_shifted)
);
uint_mux_2 #(.WIDTH(23)) man_sum_shifted_mux
(
.in0(man_sum_shifted[23:1]),
.in1(man_sum_fin[22:0]),
.sel(exp_offset_added[8]),
.res(man_sum_shifted_fin)
);
uint_mux_2 #(.WIDTH(9)) exp_fin_mux
(
.in0(exp_offset_added),
.in1(9'b0),
.sel(
exp_offset_added[8] | !(
((man_sum_shifted_fin[22] | (man_sum_shifted_fin[21] | man_sum_shifted_fin[20]))
| ((man_sum_shifted_fin[19] | man_sum_shifted_fin[18]) | (man_sum_shifted_fin[17] | man_sum_shifted_fin[16])))
|
(((man_sum_shifted_fin[15] | man_sum_shifted_fin[14]) | (man_sum_shifted_fin[13] | man_sum_shifted_fin[12]))
| ((man_sum_shifted_fin[11] | man_sum_shifted_fin[10]) | (man_sum_shifted_fin[9] | man_sum_shifted_fin[8])))
|
(((man_sum_shifted_fin[7] | man_sum_shifted_fin[6]) | (man_sum_shifted_fin[5] | man_sum_shifted_fin[4]))
| ((man_sum_shifted_fin[3] | man_sum_shifted_fin[2]) | (man_sum_shifted_fin[1] | man_sum_shifted_fin[0])))
)
),
.res(exp_fin)
);
// TODO: handle NaN and infinity
assign res[31] = man_sum[25];
assign res[30:23] = exp_fin[7:0];
assign res[22:0] = man_sum_shifted_fin;
endmodule
module tb_float32;
reg [31:0] lhs, rhs;
wire [31:0] prod, sum;
float32_mul multiplier (.lhs(lhs), .rhs(rhs), .res(prod));
float32_add adder (.lhs(lhs), .rhs(rhs), .res(sum));
initial begin
lhs = 32'h3FC94120;
rhs = 32'h455AA314;
#5;
$display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
$display("actual: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
$display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
$display("actual: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
#5;
lhs = 32'hC6C23242;
rhs = 32'h45B7591A;
#5;
$display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
$display("actual: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
$display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
$display("actual: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
#5;
lhs = 32'h48E07967;
rhs = 32'h455AA314;
#5;
$display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
$display("actual: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
$display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
$display("actual: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
#5;
lhs = 32'h48E07967;
rhs = 32'h0;
#5;
$display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
$display("actual: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
$display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
$display("actual: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
#5;
lhs = 32'h48E07967;
rhs = 32'hC8E07967;
#5;
$display("desired: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) * $bitstoshortreal(rhs));
$display("actual: %f * %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(prod));
$display("desired: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(lhs) + $bitstoshortreal(rhs));
$display("actual: %f + %f = %f", $bitstoshortreal(lhs), $bitstoshortreal(rhs), $bitstoshortreal(sum));
#5;
end
endmodule
module uint_bit_add
(
input lhs,
input rhs,
input carry,
output res,
output carry_out
);
wire xor_1, and_1, and_2;
assign xor_1 = lhs ^ rhs;
assign and_1 = xor_1 & carry;
assign and_2 = lhs & rhs;
assign res = xor_1 ^ carry;
assign carry_out = and_1 | and_2;
endmodule
module uint_add # (parameter WIDTH = 8)
(
input [(WIDTH - 1):0] lhs,
input [(WIDTH - 1):0] rhs,
output [(WIDTH - 1):0] res
);
wire [WIDTH:0] carry;
assign carry[0] = 0;
genvar i;
generate
for (i = 0; i < WIDTH; i = i + 1) begin : adders
uint_bit_add adder
(
.lhs(lhs[i]),
.rhs(rhs[i]),
.carry(carry[i]),
.res(res[i]),
.carry_out(carry[i + 1])
);
end
endgenerate
endmodule
module uint_bit_mul # (parameter WIDTH = 8)
(
input [(WIDTH * 2 - 1):0] lhs,
input rhs,
output [(WIDTH * 2 - 1):0] res
);
genvar i;
generate
for (i = 0; i < WIDTH * 2; i = i + 1) begin : ands
assign res[i] = lhs[i] & rhs;
end
endgenerate
endmodule
module uint_mul # (parameter WIDTH = 8)
(
input [(WIDTH - 1):0] lhs,
input [(WIDTH - 1):0] rhs,
output [(WIDTH * 2 - 1):0] res
);
wire [(WIDTH * 2 - 1):0] interm [(WIDTH - 1):0];
wire [(WIDTH * 2 - 1):0] prod [(WIDTH - 1):0];
wire [(WIDTH * 2 - 1):0] sum [WIDTH:0];
assign sum[0][(WIDTH * 2 - 1):0] = 0;
genvar i;
generate
for (i = 0; i < WIDTH; i = i + 1) begin : multipliers
if (i != 0)
assign interm[i][i - 1:0] = 0;
assign interm[i][(i + WIDTH - 1):i] = lhs;
assign interm[i][(WIDTH * 2 - 1):(i + WIDTH)] = 0;
uint_bit_mul #(.WIDTH(WIDTH)) multiplier
(
.lhs(interm[i]),
.rhs(rhs[i]),
.res(prod[i])
);
uint_add #(.WIDTH(WIDTH * 2)) adder
(
.lhs(prod[i]),
.rhs(sum[i]),
.res(sum[i + 1])
);
end
endgenerate
assign res = sum[WIDTH];
endmodule
module uint_mux_2 # (parameter WIDTH = 8)
(
input [(WIDTH - 1):0] in0,
input [(WIDTH - 1):0] in1,
input sel,
output [(WIDTH - 1):0] res
);
assign res = sel ? in1 : in0;
endmodule
module uint_bit_comp
(
input lhs,
input rhs,
output equal,
output res
);
assign equal = ~(lhs ^ rhs);
assign res = rhs;
endmodule
module uint_comp # (parameter WIDTH = 8)
(
input [(WIDTH - 1):0] lhs,
input [(WIDTH - 1):0] rhs,
output equal,
output res
);
wire equal_res[(WIDTH - 1):0];
wire equal_res_cum[(WIDTH - 1):0];
wire comp_res[(WIDTH - 1):0];
wire mux_res[(WIDTH - 1):0];
genvar i;
generate
for (i = 0; i < WIDTH; i = i + 1) begin : comparators
uint_bit_comp bit_comp
(
.lhs(lhs[i]),
.rhs(rhs[i]),
.equal(equal_res[i]),
.res(comp_res[i])
);
if (i == 0) begin
assign equal_res_cum[i] = equal_res[i];
assign mux_res[i] = comp_res[i];
end
else begin
uint_mux_2 #(.WIDTH(1)) bit_mux
(
.in0(comp_res[i]),
.in1(mux_res[i - 1]),
.sel(equal_res[i]),
.res(mux_res[i])
);
assign equal_res_cum[i] = equal_res[i] & equal_res_cum[i - 1];
end
end
endgenerate
assign equal = equal_res_cum[WIDTH - 1];
assign res = mux_res[WIDTH - 1];
endmodule
module uint_sr_const # (parameter WIDTH = 8, parameter SHIFT = 1)
(
input [(WIDTH - 1):0] in,
input logical,
output [(WIDTH - 1):0] res
);
genvar i;
generate
if (SHIFT <= 0) begin
assign res = in;
end
else if (SHIFT < WIDTH) begin
for (i = WIDTH - 1; i >= WIDTH - SHIFT; i = i - 1) begin
assign res[i] = (~logical) & in[WIDTH - 1];
end
assign res[(WIDTH - SHIFT - 1):0] = in[(WIDTH - 1):SHIFT];
end
else begin
for (i = 0; i < WIDTH; i = i + 1) begin
assign res[i] = (~logical) & in[WIDTH - 1];
end
end
endgenerate
endmodule
module uint_sr # (parameter WIDTH = 8, parameter SHIFT = 3)
(
input [(WIDTH - 1):0] in,
input [(SHIFT - 1):0] shift,
input logical,
output [(WIDTH - 1):0] res
);
genvar i;
wire [(WIDTH - 1):0] shift_res[SHIFT:0];
wire [(WIDTH - 1):0] mux_true[SHIFT:0];
assign shift_res[0] = in;
generate
for (i = 0; i < SHIFT; i = i + 1) begin : shifters
uint_sr_const #(.WIDTH(WIDTH), .SHIFT(1 << i)) shifter
(
.in(shift_res[i]),
.logical(logical),
.res(mux_true[i])
);
uint_mux_2 #(.WIDTH(WIDTH)) mux
(
.in0(shift_res[i]),
.in1(mux_true[i]),
.sel(shift[i]),
.res(shift_res[i + 1])
);
end
endgenerate
assign res = shift_res[SHIFT];
endmodule
module uint_sl_const # (parameter WIDTH = 8, parameter SHIFT = 1)
(
input [(WIDTH - 1):0] in,
output [(WIDTH - 1):0] res
);
if (SHIFT <= 0) begin
assign res = in;
end
else if (SHIFT < WIDTH) begin
assign res[(WIDTH - 1):SHIFT] = in[(WIDTH - SHIFT - 1):0];
assign res[(SHIFT - 1):0] = 0;
end
else begin
assign res = 0;
end
endmodule
module uint_sl # (parameter WIDTH = 8, parameter SHIFT = 3)
(
input [(WIDTH - 1):0] in,
input [(SHIFT - 1):0] shift,
output [(WIDTH - 1):0] res
);
genvar i;
wire [(WIDTH - 1):0] shift_res[SHIFT:0];
wire [(WIDTH - 1):0] mux_true[SHIFT:0];
assign shift_res[0] = in;
generate
for (i = 0; i < SHIFT; i = i + 1) begin : shifters
uint_sl_const #(.WIDTH(WIDTH), .SHIFT(1 << i)) shifter
(
.in(shift_res[i]),
.res(mux_true[i])
);
uint_mux_2 #(.WIDTH(WIDTH)) mux
(
.in0(shift_res[i]),
.in1(mux_true[i]),
.sel(shift[i]),
.res(shift_res[i + 1])
);
end
endgenerate
assign res = shift_res[SHIFT];
endmodule
module uint_neg # (parameter WIDTH = 8)
(
input [(WIDTH - 1):0] in,
output [(WIDTH - 1):0] res
);
parameter [(WIDTH - 1):0] one = 1;
uint_add #(.WIDTH(WIDTH)) adder
(
.lhs(~in),
.rhs(one),
.res(res)
);
endmodule
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment