Skip to content

Instantly share code, notes, and snippets.

@ZipCPU
Created February 22, 2017 15:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZipCPU/dcb650ce5ff2d31fffb9b2c1f6e319ab to your computer and use it in GitHub Desktop.
Save ZipCPU/dcb650ce5ff2d31fffb9b2c1f6e319ab to your computer and use it in GitHub Desktop.
/////////////////////////////////////////////////////////////////////////////
//
// Filename: mpygen.cpp
//
// Project: A simple, better, multiply generator
//
// Purpose: This is the product of studying the signed, two's complement,
// multiply operation presented on wikipedia,
//
// http://en.wikipedia.org/wiki/Binary_multiplier
//
// Basically, this file supports the routine, bestmpy, which
// builds a multiply for any pair of bitwidths. While I
// didn't really want to build this in a coregen fashion,
// such as you have here, the multiply operator kept changing
// depending upon the bitwidth. I can handle such changes in
// C++, so ... thus I have computer generated multiply code.
// As a special benefit, the number of clock cycles that
// this routine takes is given by ceiling of the base two
// logarithm of the number of bits in the greater operand,
// plus one more. Thus a 16x20 multiply requires 6 clocks,
// whereas a 64x96 bit multiply (if you could build it), would
// require 8 clocks. Not bad, huh? Oh, and the memory
// requirements get smaller at each stage, as opposed to the
// shiftadd multiply where the requirements were constant on
// a per stage level, and often many more than I needed.
//
// The basic operation is to create a "tableau" of words to
// add together. This is my term for the result of all of the
// one bit multiplies (i_a[x] * i_b), and often written out
// between two horizontal lines while doing long division.
// This "tableau" is then added together to create the result.
// Since so many lines need to be added together, we add two
// lines at a time, and leave no line resting. Therefore,
// every even line gets added to an odd line at every clock
// to create a new tableau that is roughly half the size of
// the previous. The operation then repeats and so forth.
//
// I'm not sure I can explain the signed arithmetic extensions,
// but that's what adds the extra 1's into the tableau and what
// ends up complementing various bits along the way.
//
// Creator: Dan Gisselquist, Ph.D.
// Gisselquist Tecnology, LLC
//
///////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2015, Gisselquist Technology, LLC
//
// This program is free software (firmware): you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or (at
// your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. (It's in the $(ROOT)/doc directory, run make with no
// target there if the PDF file isn't present.) If not, see
// <http://www.gnu.org/licenses/> for a copy.
//
// License: GPL, v3, as defined and found on www.gnu.org,
// http://www.gnu.org/licenses/gpl.html
//
//
///////////////////////////////////////////////////////////////////////////
//
const char cpyleft[] =
"///////////////////////////////////////////////////////////////////////////\n"
"//\n"
"// Copyright (C) 2015, Gisselquist Technology, LLC\n"
"//\n"
"// This program is free software (firmware): you can redistribute it and/or\n"
"// modify it under the terms of the GNU General Public License as published\n"
"// by the Free Software Foundation, either version 3 of the License, or (at\n"
"// your option) any later version.\n"
"//\n"
"// This program is distributed in the hope that it will be useful, but WITHOUT\n"
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n"
"// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License\n"
"// for more details.\n"
"//\n"
"// You should have received a copy of the GNU General Public License along\n"
"// with this program. If not, see <http://www.gnu.org/licenses/> for a\n"
"// copy.\n"
"//\n"
"// License: GPL, v3, as defined and found on www.gnu.org,\n"
"// http://www.gnu.org/licenses/gpl.html\n"
"//\n"
"//\n"
"///////////////////////////////////////////////////////////////////////////\n";
#include <stdio.h>
#include <stdlib.h>
const char prjname[] = "A simple, better, multiply generator";
const char creator[] = "// Creator: Dan Gisselquist, Ph.D.\n"
"// Gisselquist Tecnology, LLC\n";
void buildmpy(FILE *fp, char *name, int na, int nb, bool aux) {
int row, clock, nbits, nrows, nzros, maxbits = na+nb;
fprintf(fp,
"///////////////////////////////////////////////////////////////////////////\n"
"//\n"
"// Filename: %s.v\n"
"// \n"
"// Project: %s\n"
"//\n"
"// Purpose:\tThis verilog file multiplies two numbers together, without\n"
"// \t\tusing any hardware acceleration. The file itself is\n"
"// \t\tcomputer generated, so please (for your sake) don\'t\n"
"// \t\tmake any edits to the file lest you regenerate it and\n"
"// \t\tthey be lost.\n"
"//\n"
"//\n%s"
"//\n",
name, prjname, creator);
fprintf(fp, "%s", cpyleft);
if (nb < na) {
fprintf(fp, "module %s(i_clk%s, i_ce, i_b, i_a%s, o_p%s);\n",
name, (aux)?", i_rst":"",
(aux)?", i_aux":"", (aux)?", o_aux":"");
fprintf(fp,
"\tparameter\tNA=%d, NB=%d;\n"
"\tinput\t\t\t\t\ti_clk%s, i_ce;\n"
"\tinput\t\tsigned\t[(NB-1):0]\ti_b;\n"
"\tinput\t\tsigned\t[(NA-1):0]\ti_a;\n", nb, na,
(aux)?", i_rst":"");
int tmp = na; na = nb; nb = tmp;
} else {
fprintf(fp, "module %s(i_clk%s, i_ce, i_a, i_b%s, o_p%s);\n",
name, (aux)?", i_rst":"",
(aux)?", i_aux":"", (aux)?", o_aux":"");
fprintf(fp,
"\tparameter\tNA=%d, NB=%d;\n"
"\tinput\t\t\t\t\ti_clk%s, i_ce;\n"
"\tinput\t\tsigned\t[(NA-1):0]\ti_a;\n"
"\tinput\t\tsigned\t[(NB-1):0]\ti_b;\n", na, nb,
(aux)?", i_rst":"");
} if (aux) fprintf(fp, "\tinput\t\t\t\t\ti_aux;\n");
fprintf(fp, "\toutput\treg\tsigned\t[(NA+NB-1):0]\to_p;\n");
if (aux) fprintf(fp, "\toutput\treg\t\t\t\to_aux;\n");
fprintf(fp, "\n");
// Build the first tableau row
// There are Na elements, each of Nb length
clock = 0;
fprintf(fp, "\t// Clock zero: build our Tableau only.\n"
"\t// There will be one row for every bit in i_a, and each\n"
"\t// row will contain one more bit than i_b, to allow\n"
"\t// for signed arithmetic manipulation.\n"
"\t//\n"
"\t// Perhaps calling this \'clock zero\' is a misnomer. The term\n"
"\t// is left over from an earlier version where this section was\n"
"\t// truly clocked. Now, we leave this as unclocked conditional\n"
"\t// logic, since doing the first addition in logic fits within\n"
"\t// LUTs without needing the additional pair of flip flops for \n"
"\t// every row that would otherwise be needed.\n\t//\n");
for(row=0; row<nb; row++)
fprintf(fp, "\twire\t[NB:0]\tS_%d_%02d;\n", clock, row);
if (aux) fprintf(fp, "\twire\tA_%d;\n", clock);
fprintf(fp, "\n");
fprintf(fp, "\tassign\tS_%d_%02d = { 1\'b1, {(i_a[%d])?(~i_b[NB-1]):1\'b1},\n\t\t\t\t\t(i_a[%d])?i_b[NB-2:0]:{(NB-1){1\'b0}} };\n", clock, 0, 0, 0);
for(row=1; row<na-1; row++)
fprintf(fp, "\tassign\tS_%d_%02d = { 1\'b0, ((i_a[%d])?(~i_b[(NB-1)]):1\'b1),\n\t\t\t\t\t((i_a[%d]) ? i_b[(NB-2):0] : {(NB-1){1\'b0}}) };\n", clock, row, row, row);
for(row=na-1; row<nb-1; row++)
fprintf(fp, "\tassign\tS_%d_%02d = { 1\'b0, ((i_a[%d])?(~i_b[(NB-1)]):1\'b1),\n\t\t\t\t\t((i_a[%d]) ? i_b[(NB-2):0] : {(NB-1){1\'b0}}) };\n", clock, row, na-1, na-1);
fprintf(fp, "\tassign\tS_%d_%02d = { 1\'b1, ((i_a[%d])?i_b[NB-1]:1\'b0),\n\t\t\t\t\t((i_a[%d])?(~i_b[(NB-2):0]):{(NB-1){1\'b1}}) };\n", clock, row, na-1, na-1);
if (aux)
fprintf(fp, "\tassign\tA_%d = i_aux;\n", clock);
nrows = nb; nbits = nb+1, nzros=1;
while(nrows > 1) {
fprintf(fp, "\n\t//\n\t// Round #%d, clock = %d, nz = %d, nbits = %d, nrows = %d\n",
clock, clock, nzros, nbits, nrows);
clock++; fprintf(fp, "\n");
for(row=0; row<(nrows+1)/2; row++) {
fprintf(fp, "\treg\t[(%d-1):0]\tS_%d_%02d;\n",
((nbits+1+nzros)>maxbits)?maxbits
:(nbits+1+nzros), clock, row);
}
if (aux) fprintf(fp, "\treg\tA_%d;\n", clock);
fprintf(fp, "\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_ce)\n"
"\t\tbegin\n");
for(row=0; row<nrows/2; row++) {
fprintf(fp, "\t\t\tS_%d_%02d <= { ", clock, row);
if (maxbits-nbits>0) {
if (nzros+nbits+1 > maxbits)
fprintf(fp, "%d\'b0, ", maxbits-nbits);
else
fprintf(fp, "%d\'b0, ", nzros);
fprintf(fp, "S_%d_%02d } + { S_%d_%02d",
clock-1, 2*row, clock-1, 2*row+1);
} else {
fprintf(fp, "S_%d_%02d[%d:0] } + { S_%d_%02d",
clock-1, 2*row,
maxbits-1,
clock-1, 2*row+1);
}
if (nzros+nbits+1 > maxbits)
fprintf(fp, "[%d:0]", maxbits-1-nzros);
fprintf(fp, ", %d\'b0 };\n", nzros);
}
if (nrows&1) {
fprintf(fp, "\t\t\tS_%d_%02d <= { ",
clock, row);
if (nzros+nbits+1 > maxbits)
fprintf(fp, "%d\'b0, ", maxbits-nbits);
else
fprintf(fp, "%d\'b0, ", nzros+1);
fprintf(fp, "S_%d_%02d };\n", clock-1, nrows-1);
}
fprintf(fp, "\t\tend\n");
if (aux)
fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_rst)\n"
"\t\t\tA_%d <= 1'b0;\n"
"\t\telse if (i_ce)\n"
"\t\t\tA_%d <= A_%d;\n", clock, clock, clock-1);
nrows = (nrows+1)/2;
nbits+=1+nzros; nzros<<= 1;
}
// The full multiply is complete, just clock our outputs
// to values we've already calculated.
if (true) { // Register the results, requiring an extra clock
fprintf(fp, "\n\talways @(posedge i_clk)\n\t\tif (i_ce)\n");
fprintf(fp, "\t\t\to_p <= S_%d_00[(NA+NB-1):0];\n", clock);
if (aux) {
fprintf(fp, "\n"
"\talways @(posedge i_clk)\n"
"\t\tif (i_rst)\n"
"\t\t\to_aux <= 1\'b0;\n"
"\t\telse if (i_ce)\n"
"\t\t\to_aux <= A_%d;\n", clock);
}
} else { // Just do a wire select to capture our results
fprintf(fp, "\n\tassign\to_p = S_%d_00[(NA+NB-1):0];\n", clock);
if (aux) fprintf(fp, "\tassign\to_aux = A_%d;\n", clock);
}
fprintf(fp, "\nendmodule\n");
}
void buildmpy(int Na, int Nb, bool use_aux) {
FILE *fp;
char fname[256];
sprintf(fname, "sgnmpy_%dx%d.v", Na, Nb);
fp = fopen(fname, "w");
if (!fp) {
fprintf(stderr, "Could not open %s for writing\n", fname);
perror("O/S Err:");
exit(-1);
}
sprintf(fname, "sgnmpy_%dx%d", Na, Nb);
buildmpy(fp, fname, Na, Nb, use_aux);
fclose(fp);
}
int main(int argc, char **argv) {
bool use_aux = true;
buildmpy(atoi(argv[1]), atoi(argv[2]), use_aux);
return(0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment