jepers/float8.swift

## float8.swift
// ============================================================================
// This is an attempt at implementing a Float8 type by Jens Persson.
// It seems to work.
// This file is in the form of a command line program which will
// do some basic checks and print all Float8 values.
// ---------------------------------------------------------------------------
// Use it in any way you like, please let me know of any issues or
// improvements here: https://forums.swift.org/t/33337/38
// ===========================================================================


// ----------------------------------------------------------------------------
// About Avoiding Accidental Infinite Recursion.
// ----------------------------------------------------------------------------
// When implementing something like `Float8`, it's easy to cause unintentional
// infinite recursion, especially in the presence of default implementations,
// and when "cheating" by eg converting to `Float`, doing some work, and
// then converting the result back to `Float8`, as discussed here:
// https://forums.swift.org/t/33337/8
// https://forums.swift.org/t/33337/9
//
// So we'll implement eg `Float8.init(_ value: Float)` ourselves, rather than
// using the default implementation, to avoid the risk of infinite recursion
// now or in the future. And our implementation must not call any member of
// `Float8` that might result in a call back to it. Members that have to be
// avoided (depending on how we implement them) might include operators and
// literal initializers, which might be tricky to spot or remember:
// `let a: Float8 = -0.0` and `someFloat8 = 0` and `someFloat8 = -someOtherF8`.
//
// ----------------------------------------------------------------------------
// But anyway, to summarize, and to keep it relatively simple and managable:
// ----------------------------------------------------------------------------
//
// * All members (of `Float8`) are separated into "layers"(/extensions).
//
// * A member defined in layer N is only allowed to call members defined in
//   layer N-1, ie: A member of layer N must not call any member in layer >= N.
//
// * Checking for disallowed calls can be done manually by commenting out all
//   but the checked member within the same layer, and commenting out all
//   higher layers. Or I guess it could be automated in some way.
//
// * But note that these precausions won't help with the problem of default
//   implementations. We must identify and implement all of these ourselves.
//
// ----------------------------------------------------------------------------


import Darwin


// ----------------------------------------------------------------------------
// MARK: - Member Layer 0
// ----------------------------------------------------------------------------

/// An 8-bit floating point type (which might not work as expected, though I
/// haven't found any issues so far).
///
/// This type has been put together by an amateur looking at this:
/// * https://en.wikipedia.org/wiki/Single-precision_floating-point_format
/// * http://www.cs.jhu.edu/~jorgev/cs333/readings/8-Bit_Floating_Point.pdf
/// * https://raw.githubusercontent.com/apple/swift/master/stdlib/public/core/FloatingPointTypes.swift.gyb
/// and by piggybacking on `Float32` as much as possible while trying to avoid
/// the risk of infinite recursion.
///
/// `Float8` has 4 exponent bits and 3 significand bits.
///
/// ```
/// Eponent bias 7
/// Exponent bit pattern:   0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
///             Exponent: sub -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7 inf/nan
///
/// 0_0000_001 = 0x01 = 2**(-6) * (0 + 1/8) =   0.001953125 (least nonzero magnitude)
/// 0_0000_111 = 0x0f = 2**(-6) * (0 + 7/8) =   0.013671875 (greatest subnormal magnitude)
/// 0_0001_000 = 0x10 = 2**(-6) * (1 + 0/8) =   0.015625 (least normal nonzero magnitude)
/// 0_0111_000 = 0x30 = 2**( 0) * (1 + 0/8) =   1.0
/// 0_1110_111 = 0x6f = 2**( 7) * (1 + 7/8) = 240.0 (greatest finite magnitude)
/// ```
struct Float8 {
    private (set) var bitPattern: UInt8

    init(bitPattern: UInt8) {
        self.bitPattern = bitPattern
    }
}


// ----------------------------------------------------------------------------
// MARK: - Member Layer 1
// ----------------------------------------------------------------------------
extension Float8 {
    typealias Exponent = Int
    typealias RawSignificand = UInt8
    typealias RawExponent = UInt
    typealias Stride = Float8
    typealias Magnitude = Float8
    typealias FloatLiteralType = Float32
    typealias IntegerLiteralType = Int64
    static var exponentBitCount: Int { 4 }
    static var significandBitCount: Int { 3 }
    static var _exponentBias: UInt { 7 } // (1 << (exponentBitCount)) - 1
    static var nan: Float8 { Float8(bitPattern: 0b0_1111_110) }
    static var signalingNaN: Float8 { Float8(bitPattern: 0b0_1111_010) }
    static var infinity: Float8 { Float8(bitPattern: 0b0_1111_000) }
    static var _negativeInfinity: Float8 { Float8(bitPattern: 0b1_1111_000) }
    static var _infinityExponent: UInt = 0b1111
    static var _significandMask: UInt8 = 0b111

    static var zero: Float8 { return Float8(bitPattern: 0) }
    static var _negativeZero: Float8 { Float8(bitPattern: 0b1_0000_000) }
    static var one: Float8 { return Float8(bitPattern: 0b0_0111_000) }

    /// 0.015625
    static var leastNormalMagnitude: Float8 {
        Float8(bitPattern: 0b0_0001_000)
    }

    /// 0.001953125
    static var leastNonzeroMagnitude: Float8 {
        Float8(bitPattern: 0b0_0000_001)
    }

    /// 240.0
    static var greatestFiniteMagnitude: Float8 {
        Float8(bitPattern: 0b0_1110_111)
    }

    /// The mathematical constant pi approximated by the closest representable
    /// `Float8` value less than pi, which is `3.0`.
    static var pi: Float8 { return Float8(bitPattern: 0b0_1000_100) }

    var exponentBitPattern: UInt { UInt((bitPattern &>> 3) & 0b1111) }

    var significandBitPattern: UInt8 { bitPattern & 0b111 }

    var sign: FloatingPointSign { bitPattern & 128 == 0 ? .plus : .minus }

    var isCanonical: Bool { return true }


    var isZero: Bool {
        return self.bitPattern & 0b0_1111_111 == 0
    }

    static prefix func -(lhs: Float8) -> Float8 {
        // I have verified that the corresponding implementation is valid for
        // all bit patterns of `Float32`.
        return Float8(bitPattern: lhs.bitPattern ^ 0b1_0000_000)
    }
}


// ----------------------------------------------------------------------------
// MARK: - Member Layer 3
// ----------------------------------------------------------------------------

extension Float8 {
    private static var _quietNaNMask: UInt8 {
        return 1 &<< UInt8(significandBitCount - 1)
    }

    var isFinite: Bool {
        return exponentBitPattern < (1 << Float8.exponentBitCount) &- 1
    }

}


// ----------------------------------------------------------------------------
// MARK: - Member Layer 4
// ----------------------------------------------------------------------------

extension Float8 {

    var isNormal: Bool {
        return exponentBitPattern > 0 && isFinite
    }

    var isSubnormal: Bool {
        return exponentBitPattern == 0 && significandBitPattern != 0
    }

    var isInfinite: Bool {
        return !isFinite && significandBitPattern == 0
    }

    var isNaN: Bool {
        return !isFinite && significandBitPattern != 0
    }

}


// ----------------------------------------------------------------------------
// MARK: - Member Layer 5
// ----------------------------------------------------------------------------

extension Float8 {
    var isSignalingNaN: Bool {
        return isNaN && (significandBitPattern & Self._quietNaNMask) == 0
    }


    var exponent: Int {
        if !isFinite { return .max }
        if isZero { return .min }
        let provisional = Int(exponentBitPattern) - Int(Self._exponentBias)
        if isNormal { return provisional }
        let shift = Self.significandBitCount -
            significandBitPattern._binaryLogarithm()
        return provisional + 1 - shift
    }

}


// ----------------------------------------------------------------------------
// MARK: - Member Layer 6
// ----------------------------------------------------------------------------

extension Float8 {

    init(sign: FloatingPointSign,
         exponentBitPattern: UInt,
         significandBitPattern: UInt8)
    {
        let signBits = sign == .minus
            ? Float8._negativeZero.bitPattern
            : Float8.zero.bitPattern
        let exponentBits = UInt8(truncatingIfNeeded:
            (exponentBitPattern & 0b1111)) &<< Self.significandBitCount
        let significandBits = significandBitPattern & Self._significandMask
        self.init(bitPattern: signBits | exponentBits | significandBits)
    }

    var nextUp : Float8 {
        // I've verified that this implementation works the same as the
        // one in the standard library for `Float`.
        // ------------------------------------------------------------
        // Silence signaling NaNs, map -0 to +0:
        // (Can't use `let x = v + 0` here)
        var x = self
        if x.isSignalingNaN {
            x = Float8(bitPattern: x.bitPattern | Float8.nan.bitPattern)
        } else if x.isZero {
            x = .zero
        }
        if x < Float8.infinity {
            let increment = Int8(bitPattern: x.bitPattern) &>> 7 | 1
            let bitPattern_ = x.bitPattern &+ UInt8(bitPattern: increment)
            return Float8(bitPattern: bitPattern_)
        }
        return x
    }

    public var nextDown: Float8 {
        return -(-self).nextUp
    }

}


// ----------------------------------------------------------------------------
// MARK: - Member Layer 7
// ----------------------------------------------------------------------------

extension Float8 {

    private static func _convert<Source: BinaryInteger>(from source: Source)
        -> (value: Self, exact: Bool)
    {
        // --------------------------------------------------------------------
        // Copied with modifications from stdlib impl in FloatingPoint.swift
        // --------------------------------------------------------------------
        // Zero is really extra simple, and saves us from trying to normalize
        // a value that cannot be normalized.
        if _fastPath(source == 0) { return (Self.zero, true) }
        // We now have a non-zero value; convert it to a strictly positive
        // value by taking the magnitude.
        let magnitude = source.magnitude
        var exponent = magnitude._binaryLogarithm()
        // If the exponent would be larger than the largest representable
        // exponent, the result is just an infinity of the appropriate sign.
        guard exponent <= Self.greatestFiniteMagnitude.exponent else {
            return (
                Source.isSigned && source < 0 ? ._negativeInfinity : .infinity,
                false
            )
        }
        // If exponent <= significandBitCount, we don't need to round it to
        // construct the significand; we just need to left-shift it into place;
        // the result is always exact as we've accounted for exponent-too-large
        // already and no rounding can occur.
        if exponent <= Self.significandBitCount {
            let shift = Self.significandBitCount &- exponent
            let significand = RawSignificand(magnitude) &<< shift
            let value = Self(
                sign: Source.isSigned && source < 0 ? .minus : .plus,
                exponentBitPattern: Self._exponentBias + RawExponent(exponent),
                significandBitPattern: significand
            )
            return (value, true)
        }
        // exponent > significandBitCount, so we need to do a rounding right
        // shift, and adjust exponent if needed
        let shift = exponent &- Self.significandBitCount
        let halfway = (1 as Source.Magnitude) << (shift - 1)
        let mask = 2 * halfway - 1
        let fraction = magnitude & mask
        var significand =
            RawSignificand(truncatingIfNeeded: magnitude >> shift)
                & Self._significandMask
        if fraction > halfway || (fraction == halfway && significand & 1 == 1) {
            var carry = false
            (significand, carry) = significand.addingReportingOverflow(1)
            if carry || significand > Self._significandMask {
                exponent += 1
                guard exponent <= Self.greatestFiniteMagnitude.exponent else {
                    return (Source.isSigned && source < 0
                        ? ._negativeInfinity
                        : .infinity, false)
                }
            }
        }
        return (Self(
            sign: Source.isSigned && source < 0 ? .minus : .plus,
            exponentBitPattern: Self._exponentBias + RawExponent(exponent),
            significandBitPattern: significand
        ), fraction == 0)
    }


    static func _convert<Source: BinaryFloatingPoint>(from source: Source)
        -> (value: Self, exact: Bool)
    {
        // --------------------------------------------------------------------
        // Copied with modifications from stdlib impl in FloatingPoint.swift
        // --------------------------------------------------------------------
        // NOTE: It have/had a bug in the stdlib:
        // https://forums.swift.org/t/33337/31
        // The following code has the fix:
        precondition(Source.self != Self.self)
        guard _fastPath(!source.isZero) else {
            return (source.sign == .minus
                ? ._negativeZero
                : .zero, true)
        }
        guard _fastPath(source.isFinite) else {
            if source.isInfinite {
                return (source.sign == .minus
                    ? ._negativeInfinity
                    : .infinity, true)
            }
            // IEEE 754 requires that any NaN payload be propagated,
            // if possible.
            let payload_ =
                source.significandBitPattern &
                    ~(Source.nan.significandBitPattern |
                        Source.signalingNaN.significandBitPattern)
            let mask =
                Self.greatestFiniteMagnitude.significandBitPattern &
                    ~(Self.nan.significandBitPattern |
                        Self.signalingNaN.significandBitPattern)
            let payload = RawSignificand(truncatingIfNeeded: payload_) & mask
            // Although
            // .signalingNaN.exponentBitPattern == .nan.exponentBitPattern,
            // we do not *need* to rely on this relation, and therefore we
            // do not.
            let value = source.isSignalingNaN
                ? Self(
                    sign: source.sign,
                    exponentBitPattern: Self.signalingNaN.exponentBitPattern,
                    significandBitPattern: payload |
                        Self.signalingNaN.significandBitPattern)
                : Self(
                    sign: source.sign,
                    exponentBitPattern: Self.nan.exponentBitPattern,
                    significandBitPattern: payload |
                        Self.nan.significandBitPattern)
            // We define exactness by equality after roundtripping; since NaN
            // is never equal to itself, it can never be converted exactly.
            return (value, false)
        }

        let exponent = source.exponent
        var exemplar = Self.leastNormalMagnitude
        let exponentBitPattern: Self.RawExponent
        let leadingBitIndex: Int
        let shift: Int
        let significandBitPattern: Self.RawSignificand

        if exponent < exemplar.exponent {
            // The floating-point result is either zero or subnormal.
            exemplar = Self.leastNonzeroMagnitude
            let minExponent = exemplar.exponent
            if exponent + 1 < minExponent {
                return (source.sign == .minus ? ._negativeZero : .zero, false)
            }
            if _slowPath(exponent + 1 == minExponent) {
                // Although the most significant bit (MSB) of a subnormal
                // source significand is explicit, Swift BinaryFloatingPoint
                // APIs actually omit any explicit MSB from the count
                // represented in significandWidth. For instance:
                //
                //   Double.leastNonzeroMagnitude.significandWidth == 0
                //
                // Therefore, we do not need to adjust our work here for a
                // subnormal source.
                return source.significandWidth == 0
                    ? (source.sign == .minus ? ._negativeZero : .zero, false)
                    : (source.sign == .minus
                        ? Self(bitPattern: exemplar.bitPattern | 0b10000000)
                        : exemplar, false)
            }

            exponentBitPattern = 0 as Self.RawExponent
            leadingBitIndex = Int(Self.Exponent(exponent) - minExponent)
            shift =
                leadingBitIndex &-
                (source.significandWidth &+
                    source.significandBitPattern.trailingZeroBitCount)
            let leadingBit = source.isNormal
                ? (1 as Self.RawSignificand) << leadingBitIndex
                : 0
            significandBitPattern = leadingBit | (shift >= 0
                ? Self.RawSignificand(source.significandBitPattern) << shift
                : Self.RawSignificand(source.significandBitPattern >> -shift))
        } else {
            // The floating-point result is either normal or infinite.
            exemplar = Self.greatestFiniteMagnitude
            if exponent > exemplar.exponent {
                return (source.sign == .minus ? ._negativeInfinity : .infinity,
                        false)
            }

            exponentBitPattern = exponent < 0
                ? (1 as Self).exponentBitPattern - Self.RawExponent(-exponent)
                : (1 as Self).exponentBitPattern + Self.RawExponent(exponent)
            leadingBitIndex = exemplar.significandWidth
            shift =
                leadingBitIndex &-
                (source.significandWidth &+
                    source.significandBitPattern.trailingZeroBitCount)
            let sourceLeadingBit = source.isSubnormal
                ? (1 as Source.RawSignificand) <<
                    (source.significandWidth &+
                        source.significandBitPattern.trailingZeroBitCount)
                : 0
            significandBitPattern = shift >= 0
                ? Self.RawSignificand(
                    sourceLeadingBit ^ source.significandBitPattern) << shift
                : Self.RawSignificand(
                    (sourceLeadingBit ^ source.significandBitPattern) >> -shift)
        }

        let value = Self(
            sign: source.sign,
            exponentBitPattern: exponentBitPattern,
            significandBitPattern: significandBitPattern)

        if source.significandWidth <= leadingBitIndex {
            return (value, true)
        }
        // We promise to round to the closest representation, and if two
        // representable values are equally close, the value with more trailing
        // zeros in its significand bit pattern. Therefore, we must take a look
        // at the bits that we've just truncated.
        let ulp = (1 as Source.RawSignificand) << -shift
        let truncatedBits = source.significandBitPattern & (ulp - 1)
        if truncatedBits < ulp / 2 {
            return (value, false)
        }
        let rounded = source.sign == .minus ? value.nextDown : value.nextUp
        guard _fastPath(
            truncatedBits != ulp / 2 ||
                significandBitPattern.trailingZeroBitCount <
                rounded.significandBitPattern.trailingZeroBitCount)
            else { return (value, false) }
        return (rounded, false)
    }

}


// ----------------------------------------------------------------------------
// MARK: - Member Layer 8
// ----------------------------------------------------------------------------
extension Float8 {

    init<Source: BinaryInteger>(_ value: Source) {
        self = Self._convert(from: value).value
    }

    init?<Source: BinaryInteger>(exactly value: Source) {
        let (value_, exact) = Self._convert(from: value)
        guard exact else { return nil }
        self = value_
    }

    init(integerLiteral value: Int64) {
        self = Self._convert(from: value).value
    }

    init<Source: BinaryFloatingPoint>(_ value: Source) {
        self = Self._convert(from: value).value
    }

    init?<Source: BinaryFloatingPoint>(exactly value: Source) {
        let (value_, exact) = Self._convert(from: value)
        guard exact else { return nil }
        self = value_
    }

}


// ----------------------------------------------------------------------------
// MARK: - Member Layer 9
// ----------------------------------------------------------------------------
extension Float8 {
    init(sign: FloatingPointSign, exponent: Int, significand: Float8) {
        self.init(Float(sign: sign,
                        exponent: exponent,
                        significand: significand.float))
    }

    init(floatLiteral value: Float) {
        // There was an infinite recursion here for eg `Float8(-Float(0))`,
        // but not for `Float8(-Float(1))` or `Float8(Float(0))`.
        // This check takes care of that particular case, but are there more?
        // if value == -Float(0) { self.init(bitPattern: 0b1_0000_000) }
        // else { self.init(value) }
        self.init(value)
    }

    var float: Float {
        // This will be used a lot later (see code further down) when promoting
        // to Float, doing arithmetic, converting back the result to Float8.
        // We cannot use the following (I guess) since we have no control over
        // which members of Float8 it will call (now or in the future:
        //
        //     return Float.init(self)
        //
        // So we'll have to implement it ourselves:
        if self.isFinite {
            var zeroOrOne: Float = self.isZero ? 0.0 : 1.0
            var exp = Float(exponentBitPattern) - Float(Self._exponentBias)
            if isSubnormal {
                zeroOrOne = 0.0
                exp += 1
            }
            let fraction: Float = Float(bitPattern & 0b111) / 8.0
            let fsign = sign == .minus ? -Float(1) : Float(1)
            return fsign * powf(Float(2), exp) * (zeroOrOne + fraction)
        } else if self.isInfinite {
            return self.sign == .minus ? -Float.infinity : Float.infinity
        }
        let payload_ = self.significandBitPattern &
            ~(Float8.nan.significandBitPattern |
                Float8.signalingNaN.significandBitPattern)
        let mask = Float.greatestFiniteMagnitude.significandBitPattern &
            ~(Float.nan.significandBitPattern |
                Float.signalingNaN.significandBitPattern)
        let payload = UInt32(payload_) & mask
        let nanBitPattern = isSignalingNaN
            ? Float.signalingNaN.bitPattern
            : Float.nan.bitPattern
        let signBit: UInt32 = sign == .minus ? UInt32(1) &<< UInt32(31) : 0
        return Float(bitPattern: nanBitPattern | payload | signBit)
    }

    func distance(to other: Float8) -> Float8 {
        return Float8.init(other.float - self.float)
    }

    func advanced(by n: Float8) -> Float8 {
        return Float8.init(self.float + n.float)
    }

    var magnitude: Float8 {
        return Float8.init(self.float.magnitude)
    }

}


// TODO: Sort members of this extension into appropriate "layers":
extension Float8 : BinaryFloatingPoint {

    var significand: Float8 {
        if isNaN { return self }
        if isNormal {
            return Float8(sign: .plus,
                          exponentBitPattern: Self._exponentBias,
                          significandBitPattern: significandBitPattern)
        }
        if isSubnormal {
            let shift = Self.significandBitCount -
                significandBitPattern._binaryLogarithm()
            return Float8(
                sign: .plus,
                exponentBitPattern: Self._exponentBias,
                significandBitPattern: significandBitPattern &<< shift
            )
        }
        // zero or infinity.
        return Float8(
            sign: .plus,
            exponentBitPattern: exponentBitPattern,
            significandBitPattern: 0
        )
    }

    var ulp: Float8 {
        guard isFinite else { return .nan }
        if isNormal {
            let bitPattern_ = bitPattern & Self.infinity.bitPattern
            return Float8(bitPattern: bitPattern_) * 0x1p-3
        }
        return .leastNormalMagnitude * 0x1p-3
    }

    var binade: Float8 {
        guard isFinite else { return Float8.nan }
        if isSubnormal {
            let shifts = (bitPattern & 0b0_0000_111).leadingZeroBitCount
            let signBit = bitPattern & 0b1_0000_000
            return Float8(bitPattern: signBit | (UInt8(1) &<< (7 &- shifts)))
        }
        return Float8(bitPattern:
            bitPattern & (Float8._negativeInfinity).bitPattern)
    }

    var significandWidth: Int {
        let trailingZeroBits = significandBitPattern.trailingZeroBitCount
        if isNormal {
            guard significandBitPattern != 0 else { return 0 }
            return Self.significandBitCount &- trailingZeroBits
        }
        if isSubnormal {
            let leadingZeroBits = significandBitPattern.leadingZeroBitCount
            return Self.RawSignificand.bitWidth &-
                (trailingZeroBits &+ leadingZeroBits &+ 1)
        }
        return -1
    }

    mutating func round(_ rule: FloatingPointRoundingRule) {
        var f = self.float
        f.round(rule)
        self = Float8(f)
    }

    static func - (lhs: Float8, rhs: Float8) -> Float8 {
        return Float8(lhs.float - rhs.float)
    }

    static func * (lhs: Float8, rhs: Float8) -> Float8 {
        return Float8(lhs.float * rhs.float)
    }

    static func *= (lhs: inout Float8, rhs: Float8) {
        var f = lhs.float
        f *= rhs.float
        lhs = Float8(f)
    }

    static func / (lhs: Float8, rhs: Float8) -> Float8 {
        return Float8(lhs.float / rhs.float)
    }

    static func /= (lhs: inout Float8, rhs: Float8) {
        var f = lhs.float
        f /= rhs.float
        lhs = Float8(f)
    }

    static func += (lhs: inout Float8, rhs: Float8) {
        var f = lhs.float
        f += rhs.float
        lhs = Float8(f)
    }

    static func + (lhs: Float8, rhs: Float8) -> Float8 {
        let r = lhs.float + rhs.float
        return Float8.init(r)
    }

    static func -= (lhs: inout Float8, rhs: Float8) {
        var f = lhs.float
        f -= rhs.float
        lhs = Float8(f)
    }

    mutating func formRemainder(dividingBy other: Float8) {
        var f = self.float
        f.formRemainder(dividingBy: other.float)
        self = Float8(f)
    }

    mutating func formTruncatingRemainder(dividingBy other: Float8) {
        var f = self.float
        f.formTruncatingRemainder(dividingBy: other.float)
        self = Float8(f)
    }

    mutating func formSquareRoot() {
        var f = self.float
        f.formSquareRoot()
        self = Float8(f)
    }

    mutating func addProduct(_ lhs: Float8, _ rhs: Float8) {
        var f = self.float
        f.addProduct(lhs.float, rhs.float)
        self = Float8(f)
    }

    func isEqual(to other: Float8) -> Bool {
        return self.float.isEqual(to: other.float)
    }

    func isLess(than other: Float8) -> Bool {
        return self.float.isLess(than: other.float)
    }

    func isLessThanOrEqualTo(_ other: Float8) -> Bool {
        return self.float.isLessThanOrEqualTo(other.float)
    }


}

extension Float8 : CustomStringConvertible, LosslessStringConvertible {
    var description: String { return "\(self.float)" }
    init?(_ description: String) {
        guard let f32 = Float(description) else { return nil }
        let f8 = Float8(f32)
        if f8.description != description { return nil }
        self = f8
    }
}


//-----------------------------------------------------------------------------
// MARK: - Demo
//-----------------------------------------------------------------------------

extension String {
    func leftPadded(to minCount: Int, with char: Character=" ") -> String {
        let c = max(0, minCount-count)
        if c == 0 { return self }
        return String(repeating: char, count: c) + self
    }
}
extension BinaryFloatingPoint {
    var segmentedBinaryString: String {
        let e = String(exponentBitPattern, radix: 2)
        let s = String(significandBitPattern, radix: 2)
        return [self.sign == .plus ? "0" : "1", "_",
                e.leftPadded(to: Self.exponentBitCount, with: "0"), "_",
                s.leftPadded(to: Self.significandBitCount, with: "0")].joined()
    }
}
extension LosslessStringConvertible {
    func leftPadded(to minCount: Int, with char: Character=" ") -> String {
        return description.leftPadded(to: minCount, with: char)
    }
}


extension Float8 {


    static func test() {
        // --------------------------------------------------------------------
        // See https://forums.swift.org/t/33337/19 and
        // https://forums.swift.org/t/33337/23
        // for details about this.
        precondition(Float8.significandBitCount >= 2) // at least 3 bits, one
        // of which may be implicit.
        precondition(Float8.exponentBitCount >= 2)
        // IEEE-754 imposes the following constraints on the exponent field:
        let emin = Float8.leastNormalMagnitude.exponent
        let emax = Float8.greatestFiniteMagnitude.exponent
        precondition(emin <= emax)
        precondition(emax >= 2)
        precondition(emin == 1 - emax)
        precondition(emax == (1 << (Float8.exponentBitCount - 1)) - 1)
        // --------------------------------------------------------------------
        // Some other checks:
        // If x is -leastNonzeroMagnitude, then x.nextUp is -0.0.
        do {
            var x = Float8.leastNonzeroMagnitude.nextDown
            precondition(x.isZero && x.sign == .plus)
            x = (-Float8.leastNonzeroMagnitude).nextUp
            precondition(x.isZero && x.sign == .minus)
        }
        precondition(Float8.leastNonzeroMagnitude.significandWidth == 0)
        precondition(Float8.leastNonzeroMagnitude.nextUp.significandWidth == 0)
        precondition(Float8.leastNonzeroMagnitude.nextUp.nextUp.significandWidth == 1)
        precondition(Float8(-Float(0)).ulp > 0)
        precondition((0 as Float8).binade == -Float8(0) + -0.0)
        precondition(Double(Float8.zero.ulp) ==
            Double(sign: .plus,
                   exponent: 1 - Int(Float8._exponentBias),
                   significand: 1.0 / Double(1 << Float8.significandBitCount)))
        precondition(Float8.leastNonzeroMagnitude.binade * -1 == -Float8.zero.ulp)
        precondition(-Float8.leastNonzeroMagnitude.nextUp.binade == -(Float8.zero.ulp * 2 + -0.0))
        do {
            let a = Float8.greatestFiniteMagnitude
            let b = a.ulp / 2
            precondition(a + b == .infinity)
            precondition(a + b.nextDown == a)
            precondition(-a - b == -.infinity)
            precondition(-a - b.nextDown == -a)
        }
        // Print all values:
        var finCount = 0
        var infCount = 0
        var nanCount = 0
        print("   N       Float8   bitPattern  exponent  significand       binade          ulp")
        print("-------------------------------------------------------------------------------")
        for byteValue: UInt8 in .min ... .max {
            let v = Float8(bitPattern: byteValue)
            let expStr: String
            switch v.exponent {
            case .min: expStr = "Int.min"
            case .max: expStr = "Int.max"
            default: expStr = v.exponent.description
            }
            print(
                byteValue.leftPadded(to: 4),
                v.leftPadded(to: 12),
                v.segmentedBinaryString.leftPadded(to: 12),
                expStr.leftPadded(to: 9),
                v.significand.leftPadded(to: 12),
                v.binade.leftPadded(to: 12),
                v.ulp.leftPadded(to: 12),
                v.isSubnormal ? "subnormal" : v.isNormal ? "normal" : "n/a"
            )

            if v.isFinite { finCount += 1 }
            if v.isNaN { nanCount += 1 }
            if v.isInfinite { infCount += 1 }
        }
        print("Number of finite values:", finCount)
        print("Number of infinite values:", infCount)
        print("Number of NaNs:", nanCount)
        precondition(finCount + infCount + nanCount == 256)
        print("--")

        var w: (Float8, Float8) = (-240, 240)
        while true {
            print(w.0.leftPadded(to: 12),
                  w.0.segmentedBinaryString.leftPadded(to: 12),
                  w.1.segmentedBinaryString.leftPadded(to: 12),
                  w.1.leftPadded(to: 12)
            )
            precondition(w.0 + w.1 == -0.0) // use -0.0 just to check
            precondition((w.0 - w.1).sign == w.0.sign)
            precondition(w.0.nextUp.nextDown == w.0)
            if w.0.nextUp.isInfinite || w.1.nextDown.isInfinite { break }
            w = (w.0.nextUp, w.1.nextDown)
        }
        precondition(w == (240, -240))
        print("--")
    }

}


Float8.test()