rjmccall/fixed_point_epsilon_overflow.cpp

## fixed_point_epsilon_overflow.cpp
// A program to find two fixed-precision numbers which, when
// multiplied, yield the maximum representable value of the type
// plus a non-zero unrepresentable epsilon.
//
// Author: John McCall <rjmccall@gmail.com>

#include <stdio.h>
#include <err.h>
#include <iostream>

// unsigned fixed-precision number of width N and scale S
static const int N = 8;
static const int S = 4;

static const int MIN = 0;
static const int MAX = (1 << N) - 1;

typedef uint32_t Value;
typedef uint64_t Product;

int main(int argc, const char * const *argv) {
  if (argc != 3) errx(1, "expected two arguments: a bit-width and a scale");

  char *end;
  long N = strtol(argv[1], &end, 10);
  if (*end != '\0') errx(1, "invalid bit-width");
  long S = strtol(argv[2], &end, 10);
  if (*end != '\0') errx(1, "invalid scale");
  if (S < 0 || S > N) errx(1, "scale must be positive and less than bit-width");
  if (N > sizeof(Value) * CHAR_BIT) errx(1, "N is too big for this program");

  Value MIN = 0, MAX = (Value(1) << N) - 1;
  Value DIVISOR = Value(1) << S, PRODUCT_DIVISOR = Value(1) << (2 * S);

  // Iteerate over all possible values of A and B.
  for (Value a = MIN; a <= MAX; ++a) {
    // Avoid duplicate examples by setting B >= A.
    for (Value b = a; b <= MAX; ++b) {
      // A == a * 2^-S
      // B == b * 2^-S
      // A * B == a * b * 2^-2S

      // So a * b is the true product multipled by 2^S.
      Product scaledProduct = Product(a) * b;

      // The product with representable precision.
      Product truncatedProduct = scaledProduct >> S;

      // We're looking for a product whose representable value is
      // exactly the maximum...
      if (truncatedProduct != MAX) continue;

      // ...and there's non-zero lost precision.
      Product lostPrecision = scaledProduct - (truncatedProduct << S);
      if (lostPrecision == 0) continue;

      std::cout << "a=" << a << "/" << DIVISOR
                << ", b=" << b << "/" << DIVISOR
                << ", product=" << truncatedProduct <<  "/" << DIVISOR
                << " + " << lostPrecision << "/" << PRODUCT_DIVISOR
                << "\n";
    }
  }
}
	// A program to find two fixed-precision numbers which, when
	// multiplied, yield the maximum representable value of the type
	// plus a non-zero unrepresentable epsilon.
	//
	// Author: John McCall <rjmccall@gmail.com>

	#include <stdio.h>
	#include <err.h>
	#include <iostream>

	// unsigned fixed-precision number of width N and scale S
	static const int N = 8;
	static const int S = 4;

	static const int MIN = 0;
	static const int MAX = (1 << N) - 1;

	typedef uint32_t Value;
	typedef uint64_t Product;

	int main(int argc, const char * const *argv) {
	if (argc != 3) errx(1, "expected two arguments: a bit-width and a scale");

	char *end;
	long N = strtol(argv[1], &end, 10);
	if (*end != '\0') errx(1, "invalid bit-width");
	long S = strtol(argv[2], &end, 10);
	if (*end != '\0') errx(1, "invalid scale");
	if (S < 0 \|\| S > N) errx(1, "scale must be positive and less than bit-width");
	if (N > sizeof(Value) * CHAR_BIT) errx(1, "N is too big for this program");

	Value MIN = 0, MAX = (Value(1) << N) - 1;
	Value DIVISOR = Value(1) << S, PRODUCT_DIVISOR = Value(1) << (2 * S);

	// Iteerate over all possible values of A and B.
	for (Value a = MIN; a <= MAX; ++a) {
	// Avoid duplicate examples by setting B >= A.
	for (Value b = a; b <= MAX; ++b) {
	// A == a * 2^-S
	// B == b * 2^-S
	// A * B == a * b * 2^-2S

	// So a * b is the true product multipled by 2^S.
	Product scaledProduct = Product(a) * b;

	// The product with representable precision.
	Product truncatedProduct = scaledProduct >> S;

	// We're looking for a product whose representable value is
	// exactly the maximum...
	if (truncatedProduct != MAX) continue;

	// ...and there's non-zero lost precision.
	Product lostPrecision = scaledProduct - (truncatedProduct << S);
	if (lostPrecision == 0) continue;

	std::cout << "a=" << a << "/" << DIVISOR
	<< ", b=" << b << "/" << DIVISOR
	<< ", product=" << truncatedProduct << "/" << DIVISOR
	<< " + " << lostPrecision << "/" << PRODUCT_DIVISOR
	<< "\n";
	}
	}
	}