Created
January 24, 2020 02:12
-
-
Save rjmccall/562c2c7c9d289edd8cdf034edd6c1f17 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A program to find two fixed-precision numbers which, when | |
// multiplied, yield the maximum representable value of the type | |
// plus a non-zero unrepresentable epsilon. | |
// | |
// Author: John McCall <rjmccall@gmail.com> | |
#include <stdio.h> | |
#include <err.h> | |
#include <iostream> | |
// unsigned fixed-precision number of width N and scale S | |
static const int N = 8; | |
static const int S = 4; | |
static const int MIN = 0; | |
static const int MAX = (1 << N) - 1; | |
typedef uint32_t Value; | |
typedef uint64_t Product; | |
int main(int argc, const char * const *argv) { | |
if (argc != 3) errx(1, "expected two arguments: a bit-width and a scale"); | |
char *end; | |
long N = strtol(argv[1], &end, 10); | |
if (*end != '\0') errx(1, "invalid bit-width"); | |
long S = strtol(argv[2], &end, 10); | |
if (*end != '\0') errx(1, "invalid scale"); | |
if (S < 0 || S > N) errx(1, "scale must be positive and less than bit-width"); | |
if (N > sizeof(Value) * CHAR_BIT) errx(1, "N is too big for this program"); | |
Value MIN = 0, MAX = (Value(1) << N) - 1; | |
Value DIVISOR = Value(1) << S, PRODUCT_DIVISOR = Value(1) << (2 * S); | |
// Iteerate over all possible values of A and B. | |
for (Value a = MIN; a <= MAX; ++a) { | |
// Avoid duplicate examples by setting B >= A. | |
for (Value b = a; b <= MAX; ++b) { | |
// A == a * 2^-S | |
// B == b * 2^-S | |
// A * B == a * b * 2^-2S | |
// So a * b is the true product multipled by 2^S. | |
Product scaledProduct = Product(a) * b; | |
// The product with representable precision. | |
Product truncatedProduct = scaledProduct >> S; | |
// We're looking for a product whose representable value is | |
// exactly the maximum... | |
if (truncatedProduct != MAX) continue; | |
// ...and there's non-zero lost precision. | |
Product lostPrecision = scaledProduct - (truncatedProduct << S); | |
if (lostPrecision == 0) continue; | |
std::cout << "a=" << a << "/" << DIVISOR | |
<< ", b=" << b << "/" << DIVISOR | |
<< ", product=" << truncatedProduct << "/" << DIVISOR | |
<< " + " << lostPrecision << "/" << PRODUCT_DIVISOR | |
<< "\n"; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment