Skip to content

Instantly share code, notes, and snippets.

@rjmccall
Created January 24, 2020 02:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rjmccall/562c2c7c9d289edd8cdf034edd6c1f17 to your computer and use it in GitHub Desktop.
Save rjmccall/562c2c7c9d289edd8cdf034edd6c1f17 to your computer and use it in GitHub Desktop.
// A program to find two fixed-precision numbers which, when
// multiplied, yield the maximum representable value of the type
// plus a non-zero unrepresentable epsilon.
//
// Author: John McCall <rjmccall@gmail.com>
#include <stdio.h>
#include <err.h>
#include <iostream>
// unsigned fixed-precision number of width N and scale S
static const int N = 8;
static const int S = 4;
static const int MIN = 0;
static const int MAX = (1 << N) - 1;
typedef uint32_t Value;
typedef uint64_t Product;
int main(int argc, const char * const *argv) {
if (argc != 3) errx(1, "expected two arguments: a bit-width and a scale");
char *end;
long N = strtol(argv[1], &end, 10);
if (*end != '\0') errx(1, "invalid bit-width");
long S = strtol(argv[2], &end, 10);
if (*end != '\0') errx(1, "invalid scale");
if (S < 0 || S > N) errx(1, "scale must be positive and less than bit-width");
if (N > sizeof(Value) * CHAR_BIT) errx(1, "N is too big for this program");
Value MIN = 0, MAX = (Value(1) << N) - 1;
Value DIVISOR = Value(1) << S, PRODUCT_DIVISOR = Value(1) << (2 * S);
// Iteerate over all possible values of A and B.
for (Value a = MIN; a <= MAX; ++a) {
// Avoid duplicate examples by setting B >= A.
for (Value b = a; b <= MAX; ++b) {
// A == a * 2^-S
// B == b * 2^-S
// A * B == a * b * 2^-2S
// So a * b is the true product multipled by 2^S.
Product scaledProduct = Product(a) * b;
// The product with representable precision.
Product truncatedProduct = scaledProduct >> S;
// We're looking for a product whose representable value is
// exactly the maximum...
if (truncatedProduct != MAX) continue;
// ...and there's non-zero lost precision.
Product lostPrecision = scaledProduct - (truncatedProduct << S);
if (lostPrecision == 0) continue;
std::cout << "a=" << a << "/" << DIVISOR
<< ", b=" << b << "/" << DIVISOR
<< ", product=" << truncatedProduct << "/" << DIVISOR
<< " + " << lostPrecision << "/" << PRODUCT_DIVISOR
<< "\n";
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment