Created
February 22, 2017 15:58
-
-
Save ZipCPU/dcb650ce5ff2d31fffb9b2c1f6e319ab to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
///////////////////////////////////////////////////////////////////////////// | |
// | |
// Filename: mpygen.cpp | |
// | |
// Project: A simple, better, multiply generator | |
// | |
// Purpose: This is the product of studying the signed, two's complement, | |
// multiply operation presented on wikipedia, | |
// | |
// http://en.wikipedia.org/wiki/Binary_multiplier | |
// | |
// Basically, this file supports the routine, bestmpy, which | |
// builds a multiply for any pair of bitwidths. While I | |
// didn't really want to build this in a coregen fashion, | |
// such as you have here, the multiply operator kept changing | |
// depending upon the bitwidth. I can handle such changes in | |
// C++, so ... thus I have computer generated multiply code. | |
// As a special benefit, the number of clock cycles that | |
// this routine takes is given by ceiling of the base two | |
// logarithm of the number of bits in the greater operand, | |
// plus one more. Thus a 16x20 multiply requires 6 clocks, | |
// whereas a 64x96 bit multiply (if you could build it), would | |
// require 8 clocks. Not bad, huh? Oh, and the memory | |
// requirements get smaller at each stage, as opposed to the | |
// shiftadd multiply where the requirements were constant on | |
// a per stage level, and often many more than I needed. | |
// | |
// The basic operation is to create a "tableau" of words to | |
// add together. This is my term for the result of all of the | |
// one bit multiplies (i_a[x] * i_b), and often written out | |
// between two horizontal lines while doing long division. | |
// This "tableau" is then added together to create the result. | |
// Since so many lines need to be added together, we add two | |
// lines at a time, and leave no line resting. Therefore, | |
// every even line gets added to an odd line at every clock | |
// to create a new tableau that is roughly half the size of | |
// the previous. The operation then repeats and so forth. | |
// | |
// I'm not sure I can explain the signed arithmetic extensions, | |
// but that's what adds the extra 1's into the tableau and what | |
// ends up complementing various bits along the way. | |
// | |
// Creator: Dan Gisselquist, Ph.D. | |
// Gisselquist Tecnology, LLC | |
// | |
/////////////////////////////////////////////////////////////////////////// | |
// | |
// Copyright (C) 2015, Gisselquist Technology, LLC | |
// | |
// This program is free software (firmware): you can redistribute it and/or | |
// modify it under the terms of the GNU General Public License as published | |
// by the Free Software Foundation, either version 3 of the License, or (at | |
// your option) any later version. | |
// | |
// This program is distributed in the hope that it will be useful, but WITHOUT | |
// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or | |
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
// for more details. | |
// | |
// You should have received a copy of the GNU General Public License along | |
// with this program. (It's in the $(ROOT)/doc directory, run make with no | |
// target there if the PDF file isn't present.) If not, see | |
// <http://www.gnu.org/licenses/> for a copy. | |
// | |
// License: GPL, v3, as defined and found on www.gnu.org, | |
// http://www.gnu.org/licenses/gpl.html | |
// | |
// | |
/////////////////////////////////////////////////////////////////////////// | |
// | |
const char cpyleft[] = | |
"///////////////////////////////////////////////////////////////////////////\n" | |
"//\n" | |
"// Copyright (C) 2015, Gisselquist Technology, LLC\n" | |
"//\n" | |
"// This program is free software (firmware): you can redistribute it and/or\n" | |
"// modify it under the terms of the GNU General Public License as published\n" | |
"// by the Free Software Foundation, either version 3 of the License, or (at\n" | |
"// your option) any later version.\n" | |
"//\n" | |
"// This program is distributed in the hope that it will be useful, but WITHOUT\n" | |
"// ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or\n" | |
"// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License\n" | |
"// for more details.\n" | |
"//\n" | |
"// You should have received a copy of the GNU General Public License along\n" | |
"// with this program. If not, see <http://www.gnu.org/licenses/> for a\n" | |
"// copy.\n" | |
"//\n" | |
"// License: GPL, v3, as defined and found on www.gnu.org,\n" | |
"// http://www.gnu.org/licenses/gpl.html\n" | |
"//\n" | |
"//\n" | |
"///////////////////////////////////////////////////////////////////////////\n"; | |
#include <stdio.h> | |
#include <stdlib.h> | |
const char prjname[] = "A simple, better, multiply generator"; | |
const char creator[] = "// Creator: Dan Gisselquist, Ph.D.\n" | |
"// Gisselquist Tecnology, LLC\n"; | |
void buildmpy(FILE *fp, char *name, int na, int nb, bool aux) { | |
int row, clock, nbits, nrows, nzros, maxbits = na+nb; | |
fprintf(fp, | |
"///////////////////////////////////////////////////////////////////////////\n" | |
"//\n" | |
"// Filename: %s.v\n" | |
"// \n" | |
"// Project: %s\n" | |
"//\n" | |
"// Purpose:\tThis verilog file multiplies two numbers together, without\n" | |
"// \t\tusing any hardware acceleration. The file itself is\n" | |
"// \t\tcomputer generated, so please (for your sake) don\'t\n" | |
"// \t\tmake any edits to the file lest you regenerate it and\n" | |
"// \t\tthey be lost.\n" | |
"//\n" | |
"//\n%s" | |
"//\n", | |
name, prjname, creator); | |
fprintf(fp, "%s", cpyleft); | |
if (nb < na) { | |
fprintf(fp, "module %s(i_clk%s, i_ce, i_b, i_a%s, o_p%s);\n", | |
name, (aux)?", i_rst":"", | |
(aux)?", i_aux":"", (aux)?", o_aux":""); | |
fprintf(fp, | |
"\tparameter\tNA=%d, NB=%d;\n" | |
"\tinput\t\t\t\t\ti_clk%s, i_ce;\n" | |
"\tinput\t\tsigned\t[(NB-1):0]\ti_b;\n" | |
"\tinput\t\tsigned\t[(NA-1):0]\ti_a;\n", nb, na, | |
(aux)?", i_rst":""); | |
int tmp = na; na = nb; nb = tmp; | |
} else { | |
fprintf(fp, "module %s(i_clk%s, i_ce, i_a, i_b%s, o_p%s);\n", | |
name, (aux)?", i_rst":"", | |
(aux)?", i_aux":"", (aux)?", o_aux":""); | |
fprintf(fp, | |
"\tparameter\tNA=%d, NB=%d;\n" | |
"\tinput\t\t\t\t\ti_clk%s, i_ce;\n" | |
"\tinput\t\tsigned\t[(NA-1):0]\ti_a;\n" | |
"\tinput\t\tsigned\t[(NB-1):0]\ti_b;\n", na, nb, | |
(aux)?", i_rst":""); | |
} if (aux) fprintf(fp, "\tinput\t\t\t\t\ti_aux;\n"); | |
fprintf(fp, "\toutput\treg\tsigned\t[(NA+NB-1):0]\to_p;\n"); | |
if (aux) fprintf(fp, "\toutput\treg\t\t\t\to_aux;\n"); | |
fprintf(fp, "\n"); | |
// Build the first tableau row | |
// There are Na elements, each of Nb length | |
clock = 0; | |
fprintf(fp, "\t// Clock zero: build our Tableau only.\n" | |
"\t// There will be one row for every bit in i_a, and each\n" | |
"\t// row will contain one more bit than i_b, to allow\n" | |
"\t// for signed arithmetic manipulation.\n" | |
"\t//\n" | |
"\t// Perhaps calling this \'clock zero\' is a misnomer. The term\n" | |
"\t// is left over from an earlier version where this section was\n" | |
"\t// truly clocked. Now, we leave this as unclocked conditional\n" | |
"\t// logic, since doing the first addition in logic fits within\n" | |
"\t// LUTs without needing the additional pair of flip flops for \n" | |
"\t// every row that would otherwise be needed.\n\t//\n"); | |
for(row=0; row<nb; row++) | |
fprintf(fp, "\twire\t[NB:0]\tS_%d_%02d;\n", clock, row); | |
if (aux) fprintf(fp, "\twire\tA_%d;\n", clock); | |
fprintf(fp, "\n"); | |
fprintf(fp, "\tassign\tS_%d_%02d = { 1\'b1, {(i_a[%d])?(~i_b[NB-1]):1\'b1},\n\t\t\t\t\t(i_a[%d])?i_b[NB-2:0]:{(NB-1){1\'b0}} };\n", clock, 0, 0, 0); | |
for(row=1; row<na-1; row++) | |
fprintf(fp, "\tassign\tS_%d_%02d = { 1\'b0, ((i_a[%d])?(~i_b[(NB-1)]):1\'b1),\n\t\t\t\t\t((i_a[%d]) ? i_b[(NB-2):0] : {(NB-1){1\'b0}}) };\n", clock, row, row, row); | |
for(row=na-1; row<nb-1; row++) | |
fprintf(fp, "\tassign\tS_%d_%02d = { 1\'b0, ((i_a[%d])?(~i_b[(NB-1)]):1\'b1),\n\t\t\t\t\t((i_a[%d]) ? i_b[(NB-2):0] : {(NB-1){1\'b0}}) };\n", clock, row, na-1, na-1); | |
fprintf(fp, "\tassign\tS_%d_%02d = { 1\'b1, ((i_a[%d])?i_b[NB-1]:1\'b0),\n\t\t\t\t\t((i_a[%d])?(~i_b[(NB-2):0]):{(NB-1){1\'b1}}) };\n", clock, row, na-1, na-1); | |
if (aux) | |
fprintf(fp, "\tassign\tA_%d = i_aux;\n", clock); | |
nrows = nb; nbits = nb+1, nzros=1; | |
while(nrows > 1) { | |
fprintf(fp, "\n\t//\n\t// Round #%d, clock = %d, nz = %d, nbits = %d, nrows = %d\n", | |
clock, clock, nzros, nbits, nrows); | |
clock++; fprintf(fp, "\n"); | |
for(row=0; row<(nrows+1)/2; row++) { | |
fprintf(fp, "\treg\t[(%d-1):0]\tS_%d_%02d;\n", | |
((nbits+1+nzros)>maxbits)?maxbits | |
:(nbits+1+nzros), clock, row); | |
} | |
if (aux) fprintf(fp, "\treg\tA_%d;\n", clock); | |
fprintf(fp, "\n" | |
"\talways @(posedge i_clk)\n" | |
"\t\tif (i_ce)\n" | |
"\t\tbegin\n"); | |
for(row=0; row<nrows/2; row++) { | |
fprintf(fp, "\t\t\tS_%d_%02d <= { ", clock, row); | |
if (maxbits-nbits>0) { | |
if (nzros+nbits+1 > maxbits) | |
fprintf(fp, "%d\'b0, ", maxbits-nbits); | |
else | |
fprintf(fp, "%d\'b0, ", nzros); | |
fprintf(fp, "S_%d_%02d } + { S_%d_%02d", | |
clock-1, 2*row, clock-1, 2*row+1); | |
} else { | |
fprintf(fp, "S_%d_%02d[%d:0] } + { S_%d_%02d", | |
clock-1, 2*row, | |
maxbits-1, | |
clock-1, 2*row+1); | |
} | |
if (nzros+nbits+1 > maxbits) | |
fprintf(fp, "[%d:0]", maxbits-1-nzros); | |
fprintf(fp, ", %d\'b0 };\n", nzros); | |
} | |
if (nrows&1) { | |
fprintf(fp, "\t\t\tS_%d_%02d <= { ", | |
clock, row); | |
if (nzros+nbits+1 > maxbits) | |
fprintf(fp, "%d\'b0, ", maxbits-nbits); | |
else | |
fprintf(fp, "%d\'b0, ", nzros+1); | |
fprintf(fp, "S_%d_%02d };\n", clock-1, nrows-1); | |
} | |
fprintf(fp, "\t\tend\n"); | |
if (aux) | |
fprintf(fp, "\talways @(posedge i_clk)\n\t\tif (i_rst)\n" | |
"\t\t\tA_%d <= 1'b0;\n" | |
"\t\telse if (i_ce)\n" | |
"\t\t\tA_%d <= A_%d;\n", clock, clock, clock-1); | |
nrows = (nrows+1)/2; | |
nbits+=1+nzros; nzros<<= 1; | |
} | |
// The full multiply is complete, just clock our outputs | |
// to values we've already calculated. | |
if (true) { // Register the results, requiring an extra clock | |
fprintf(fp, "\n\talways @(posedge i_clk)\n\t\tif (i_ce)\n"); | |
fprintf(fp, "\t\t\to_p <= S_%d_00[(NA+NB-1):0];\n", clock); | |
if (aux) { | |
fprintf(fp, "\n" | |
"\talways @(posedge i_clk)\n" | |
"\t\tif (i_rst)\n" | |
"\t\t\to_aux <= 1\'b0;\n" | |
"\t\telse if (i_ce)\n" | |
"\t\t\to_aux <= A_%d;\n", clock); | |
} | |
} else { // Just do a wire select to capture our results | |
fprintf(fp, "\n\tassign\to_p = S_%d_00[(NA+NB-1):0];\n", clock); | |
if (aux) fprintf(fp, "\tassign\to_aux = A_%d;\n", clock); | |
} | |
fprintf(fp, "\nendmodule\n"); | |
} | |
void buildmpy(int Na, int Nb, bool use_aux) { | |
FILE *fp; | |
char fname[256]; | |
sprintf(fname, "sgnmpy_%dx%d.v", Na, Nb); | |
fp = fopen(fname, "w"); | |
if (!fp) { | |
fprintf(stderr, "Could not open %s for writing\n", fname); | |
perror("O/S Err:"); | |
exit(-1); | |
} | |
sprintf(fname, "sgnmpy_%dx%d", Na, Nb); | |
buildmpy(fp, fname, Na, Nb, use_aux); | |
fclose(fp); | |
} | |
int main(int argc, char **argv) { | |
bool use_aux = true; | |
buildmpy(atoi(argv[1]), atoi(argv[2]), use_aux); | |
return(0); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment