Skip to content

Instantly share code, notes, and snippets.

@Eisenwave
Last active August 19, 2020 10:01
Show Gist options
  • Save Eisenwave/11372ace8dcc8334210fd6b45d0b7f91 to your computer and use it in GitHub Desktop.
Save Eisenwave/11372ace8dcc8334210fd6b45d0b7f91 to your computer and use it in GitHub Desktop.
THIS IS A MIRROR OF http://www.hackersdelight.org/HDCode/divlu.c WHIS IS NO LONGER VIEWABLE
/* Long division, unsigned (64/32 ==> 32).
This procedure performs unsigned "long division" i.e., division of a
64-bit unsigned dividend by a 32-bit unsigned divisor, producing a
32-bit quotient. In the overflow cases (divide by 0, or quotient
exceeds 32 bits), it returns a remainder of 0xFFFFFFFF (an impossible
value).
The dividend is u1 and u0, with u1 being the most significant word.
The divisor is parameter v. The value returned is the quotient.
Max line length is 57, to fit in hacker.book. */
#include <stdio.h>
#include <stdlib.h> //To define "exit", req'd by XLC.
#define max(x, y) ((x) > (y) ? (x) : (y))
int nlz(unsigned x) {
int n;
if (x == 0) return(32);
n = 0;
if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
if (x <= 0x7FFFFFFF) {n = n + 1;}
return n;
}
unsigned myrand(void) { // Expands rand() to 32 bits.
return rand() ^ (rand() << 1);
}
/* This first version is Knuth's Algorithm D (my "divmnu") pretty much
mechanically reduced to the special case m = 4, n = 2. But some details
differ. The parameters are fullwords passed in registers, and it
returns an error indication if the result cannot be expressed in 32 bits
(divmnu would return a 3-halfword quotient). */
unsigned divlu(unsigned u1, unsigned u0, unsigned v, unsigned *r) {
const unsigned b = 65536; // Number base (16 bits).
unsigned short un[4], // Dividend and divisor
vn[2]; // normalized and broken
// up into halfwords.
unsigned short q[2]; // Quotient as halfwords.
unsigned un1, un0, // Dividend and divisor
vn0; // as fullwords.
unsigned qhat; // Estimated quotient digit.
unsigned rhat; // A remainder.
unsigned p; // Product of two digits.
int s, i, j, t, k;
if (u1 >= v) { // If overflow, set rem.
if (r != NULL) // to an impossible value,
*r = 0xFFFFFFFF; // and return the largest
return 0xFFFFFFFF;} // possible quotient.
s = nlz(v); // 0 <= s <= 31.
vn0 = v << s; // Normalize divisor.
vn[1] = vn0 >> 16; // Break divisor up into
vn[0] = vn0 & 0xFFFF; // two 16-bit halves.
// Shift dividend left.
un1 = (u1 << s) | (u0 >> 32 - s) & (-s >> 31);
un0 = u0 << s;
un[3] = un1 >> 16; // Break dividend up into
un[2] = un1; // four 16-bit halfwords
un[1] = un0 >> 16; // Note: storing into
un[0] = un0; // halfwords truncates.
for (j = 1; j >= 0; j--) {
// Compute estimate qhat of q[j].
qhat = (un[j+2]*b + un[j+1])/vn[1];
rhat = (un[j+2]*b + un[j+1]) - qhat*vn[1];
again:
if (qhat >= b || qhat*vn[0] > b*rhat + un[j]) {
qhat = qhat - 1;
rhat = rhat + vn[1];
if (rhat < b) goto again;
}
// Multiply and subtract.
k = 0;
for (i = 0; i < 2; i++) {
p = qhat*vn[i];
t = un[i+j] - k - (p & 0xFFFF);
un[i+j] = t;
k = (p >> 16) - (t >> 16);
}
t = un[j+2] - k;
un[j+2] = t;
q[j] = qhat; // Store quotient digit.
if (t < 0) { // If we subtracted too
q[j] = q[j] - 1; // much, add back.
printf("Doing add back, %08x %08x %08x\n", u1, u0, v);
k = 0;
for (i = 0; i < 2; i++) {
t = un[i+j] + vn[i] + k;
un[i+j] = t;
k = t >> 16;
}
un[j+2] = un[j+2] + k;
}
} // End j.
if (r != NULL) // If remainder is wanted,
*r = (un[1]*b + un[0]) >> s; // return it.
return q[1]*b + q[0];
}
/* This version is simplified from Knuth's because the adding back step
is never required, as the estimated quotient digit qhat is always exact.
Also, the inner loop is coded in a more efficient way (not possible in
divmnu), unrolled, and obvious resulting simplifications made. And, the
quotient and divisor are elementary variables (kept in registers) rather
than arrays. */
unsigned divlu1(unsigned u1, unsigned u0, unsigned v, unsigned *r) {
const unsigned b = 65536; // Number base (16 bits).
unsigned short un[4], // Dividend and divisor
vn1, vn0; // normalized and broken
// up into halfwords.
unsigned q; // Quotient.
unsigned un1, un0; // Dividend as fullwords.
unsigned qhat; // Estimated quotient digit.
unsigned rhat; // A remainder.
unsigned p; // Product of two digits.
int s, j, t, k;
if (u1 >= v) { // If overflow, set rem.
if (r != NULL) // to an impossible value,
*r = 0xFFFFFFFF; // and return the largest
return 0xFFFFFFFF;} // possible quotient.
s = nlz(v); // 0 <= s <= 31.
v = v << s; // Normalize divisor.
vn1 = v >> 16; // Break divisor up into
vn0 = v & 0xFFFF; // two 16-bit halves.
// Shift dividend left.
un1 = (u1 << s) | (u0 >> 32 - s) & (-s >> 31);
un0 = u0 << s;
un[3] = un1 >> 16; // Break dividend up into
un[2] = un1; // four 16-bit halfwords
un[1] = un0 >> 16; // Note: storing into
un[0] = un0; // halfwords truncates.
for (j = 1; j >= 0; j--) {
// Compute estimate qhat of current digit of q.
qhat = (un[j+2]*b + un[j+1])/vn1;
rhat = (un[j+2]*b + un[j+1]) - qhat*vn1;
again:
if (qhat >= b || qhat*vn0 > b*rhat + un[j]) {
qhat = qhat - 1;
rhat = rhat + vn1;
if (rhat < b) goto again;
}
q = q*b + qhat; // Put away quotient digit.
// Multiply and subtract.
p = qhat*vn0;
t = un[j] - p;
un[j] = t; // Store right half of t.
k = t >> 16;
p = qhat*vn1;
t = un[j+1] + k - p;
un[j+1] = t; // Store right half of t.
} // End j.
if (r != NULL) // If remainder is wanted,
*r = (un[1]*b + un[0]) >> s; // return it.
return q;
}
/* This version is divlu1 except with outer loop unrolled, and array un
changed into local variables. Several of the variables below could be
"short," but having them fullwords gives better code on gcc/Intel.
Statistics: Based on one million executions of this program, with
uniformly distributed random values for the dividend and divisor, the
number of times in each loop per execution of the program is:
again1: 0.4060
again2: 0.3469
This is the version that's in the hacker book. */
// ------------------------------ cut ----------------------------------
unsigned divlu2(unsigned u1, unsigned u0, unsigned v,
unsigned *r) {
const unsigned b = 65536; // Number base (16 bits).
unsigned un1, un0, // Norm. dividend LSD's.
vn1, vn0, // Norm. divisor digits.
q1, q0, // Quotient digits.
un32, un21, un10,// Dividend digit pairs.
rhat; // A remainder.
int s; // Shift amount for norm.
if (u1 >= v) { // If overflow, set rem.
if (r != NULL) // to an impossible value,
*r = 0xFFFFFFFF; // and return the largest
return 0xFFFFFFFF;} // possible quotient.
s = nlz(v); // 0 <= s <= 31.
v = v << s; // Normalize divisor.
vn1 = v >> 16; // Break divisor up into
vn0 = v & 0xFFFF; // two 16-bit digits.
un32 = (u1 << s) | (u0 >> 32 - s) & (-s >> 31);
un10 = u0 << s; // Shift dividend left.
un1 = un10 >> 16; // Break right half of
un0 = un10 & 0xFFFF; // dividend into two digits.
q1 = un32/vn1; // Compute the first
rhat = un32 - q1*vn1; // quotient digit, q1.
again1:
if (q1 >= b || q1*vn0 > b*rhat + un1) {
q1 = q1 - 1;
rhat = rhat + vn1;
if (rhat < b) goto again1;}
un21 = un32*b + un1 - q1*v; // Multiply and subtract.
q0 = un21/vn1; // Compute the second
rhat = un21 - q0*vn1; // quotient digit, q0.
again2:
if (q0 >= b || q0*vn0 > b*rhat + un0) {
q0 = q0 - 1;
rhat = rhat + vn1;
if (rhat < b) goto again2;}
if (r != NULL) // If remainder is wanted,
*r = (un21*b + un0 - q0*v) >> s; // return it.
return q1*b + q0;
}
// ------------------------------ cut ----------------------------------
int errors;
void check(char *name, unsigned q, unsigned r, unsigned u1, unsigned u0, unsigned v,
unsigned cq, unsigned cr) {
if (q != cq || r != cr) {
errors = errors + 1;
printf("Error in %s, dividend u = %08x %08x\n", name, u1, u0);
printf(" divisor v = %08x\n", v);
printf("Got q = %08x, r = %08x\n", q, r);
printf("Should get cq = %08x, cr = %08x\n", cq, cr);
}
return;
}
int main() {
static unsigned test[] = {
// u..., v...,
0, 0, 0, // Overflow.
1, 0, 0, // Overflow.
1, 0, 1, // Overflow.
0xffffffff, 0, 0xffffffff, // Overflow.
0, 1, 1,
0, 7, 3,
0x00000000,0xffffffff, 0x00000001,
0x01234567,0x89abcdef, 0x01234568,
0x01234567,0x89abcdef, 0x12345678,
0x01234567,0x89abcdef, 0x80000000,
0x01234567,0x89abcdef, 0x80000001,
0x01234567,0x89abcdef, 0x80007fff,
0x01234567,0x89abcdef, 0x80008000,
0x01234567,0x89abcdef, 0x80008001,
0x01234567,0x89abcdef, 0x8000ffff,
0x00000000,0x00000000, 0x0000ffff,
0x00000000,0x0000ffff, 0x0000ffff,
0x00000000,0xffff0000, 0x0000ffff,
0x00000000,0xffffffff, 0x0000ffff,
0x0000fffe,0x00000000, 0x0000ffff,
0x0000fffe,0x0000ffff, 0x0000ffff,
0x0000fffe,0xffff0000, 0x0000ffff,
0x0000fffe,0xffffffff, 0x0000ffff,
0x00000000,0x00000000, 0xffff0000,
0x00000000,0x0000ffff, 0xffff0000,
0x00000000,0xffff0000, 0xffff0000,
0x00000000,0xffffffff, 0xffff0000,
0x0000ffff,0x00000000, 0xffff0000,
0x0000ffff,0x0000ffff, 0xffff0000,
0x0000ffff,0xffff0000, 0xffff0000,
0x0000ffff,0xffffffff, 0xffff0000,
0xfffe0000,0x00000000, 0xffff0000,
0xfffe0000,0x0000ffff, 0xffff0000,
0xfffe0000,0xffff0000, 0xffff0000,
0xfffe0000,0xffffffff, 0xffff0000,
0xfffeffff,0x00000000, 0xffff0000,
0xfffeffff,0x0000ffff, 0xffff0000,
0xfffeffff,0xffff0000, 0xffff0000,
0xfffeffff,0xffffffff, 0xffff0000,
0x00000000,0x00000000, 0xffffffff,
0x00000000,0x0000ffff, 0xffffffff,
0x00000000,0xffff0000, 0xffffffff,
0x00000000,0xffffffff, 0xffffffff,
0x0000ffff,0x00000000, 0xffffffff,
0x0000ffff,0x0000ffff, 0xffffffff,
0x0000ffff,0xffff0000, 0xffffffff,
0x0000ffff,0xffffffff, 0xffffffff,
0xffff0000,0x00000000, 0xffffffff,
0xffff0000,0x0000ffff, 0xffffffff,
0xffff0000,0xffff0000, 0xffffffff,
0xffff0000,0xffffffff, 0xffffffff,
0xfffffffe,0x00000000, 0xffffffff,
0xfffffffe,0x0000ffff, 0xffffffff,
0xfffffffe,0xffff0000, 0xffffffff,
0xfffffffe,0xffffffff, 0xffffffff,
0x12345678,0x9abcdef0, 0x12345679,
0x00010000,0x00000000, 0x00010001,
0x80008000,0x00000000, 0x80008001, // First qhat = b + 1.
0x8000fffe,0xffffffff, 0x8000ffff, // First qhat = b + 1.
0x7fff8000,0x00000000, 0x8000ffff, // First qhat too big by 2.
};
int i, n;
unsigned u1, u0, v, q, r, cq, cr;
n = sizeof(test)/12 + 10;
for (i = 0; i < n; i++) {
if (i < sizeof(test)/12) {
u1 = test[3*i]; u0 = test[3*i+1]; v = test[3*i+2];
}
else {
u1 = myrand(); u0 = myrand(); v = myrand();
if (v == 0) v = 1;
if (u1 >= v) u1 = u1 & (v - 1);
}
if (i <= 3) { // If intentional overflow case:
cq = 0xFFFFFFFF;;
cr = 0xFFFFFFFF;
}
else { // Non-overflow case:
cq = ((unsigned long long)u1*0x100000000LL + u0)/v;
cr = ((unsigned long long)u1*0x100000000LL + u0) - (unsigned long long)cq*v;
}
// printf("u = %08x %08x v = %08x cq = %08x cr = %08x\r\n", u1, u0, v, cq, cr);
q = divlu(u1, u0, v, &r);
check("divlu", q, r, u1, u0, v, cq, cr);
q = divlu1(u1, u0, v, &r);
check("divlu1", q, r, u1, u0, v, cq, cr);
q = divlu2(u1, u0, v, &r);
check("divlu2", q, r, u1, u0, v, cq, cr);
}
if (errors == 0)
printf("Passed all %d cases.\n", n);
else
printf("Got %d errors out of %d cases.\n", errors, n);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment