Skip to content

Instantly share code, notes, and snippets.

@jens-bc

jens-bc/float8.swift

Last active Mar 24, 2020
Embed
What would you like to do?
A Float8 type
// ============================================================================
// This is an attempt at implementing a Float8 type by Jens Persson.
// It seems to work.
// This file is in the form of a command line program which will
// do some basic checks and print all Float8 values.
// ---------------------------------------------------------------------------
// Use it in any way you like, please let me know of any issues or
// improvements here: https://forums.swift.org/t/33337/38
// ===========================================================================
// ----------------------------------------------------------------------------
// About Avoiding Accidental Infinite Recursion.
// ----------------------------------------------------------------------------
// When implementing something like `Float8`, it's easy to cause unintentional
// infinite recursion, especially in the presence of default implementations,
// and when "cheating" by eg converting to `Float`, doing some work, and
// then converting the result back to `Float8`, as discussed here:
// https://forums.swift.org/t/33337/8
// https://forums.swift.org/t/33337/9
//
// So we'll implement eg `Float8.init(_ value: Float)` ourselves, rather than
// using the default implementation, to avoid the risk of infinite recursion
// now or in the future. And our implementation must not call any member of
// `Float8` that might result in a call back to it. Members that have to be
// avoided (depending on how we implement them) might include operators and
// literal initializers, which might be tricky to spot or remember:
// `let a: Float8 = -0.0` and `someFloat8 = 0` and `someFloat8 = -someOtherF8`.
//
// ----------------------------------------------------------------------------
// But anyway, to summarize, and to keep it relatively simple and managable:
// ----------------------------------------------------------------------------
//
// * All members (of `Float8`) are separated into "layers"(/extensions).
//
// * A member defined in layer N is only allowed to call members defined in
// layer N-1, ie: A member of layer N must not call any member in layer >= N.
//
// * Checking for disallowed calls can be done manually by commenting out all
// but the checked member within the same layer, and commenting out all
// higher layers. Or I guess it could be automated in some way.
//
// * But note that these precausions won't help with the problem of default
// implementations. We must identify and implement all of these ourselves.
//
// ----------------------------------------------------------------------------
import Darwin
// ----------------------------------------------------------------------------
// MARK: - Member Layer 0
// ----------------------------------------------------------------------------
/// An 8-bit floating point type (which might not work as expected, though I
/// haven't found any issues so far).
///
/// This type has been put together by an amateur looking at this:
/// * https://en.wikipedia.org/wiki/Single-precision_floating-point_format
/// * http://www.cs.jhu.edu/~jorgev/cs333/readings/8-Bit_Floating_Point.pdf
/// * https://raw.githubusercontent.com/apple/swift/master/stdlib/public/core/FloatingPointTypes.swift.gyb
/// and by piggybacking on `Float32` as much as possible while trying to avoid
/// the risk of infinite recursion.
///
/// `Float8` has 4 exponent bits and 3 significand bits.
///
/// ```
/// Eponent bias 7
/// Exponent bit pattern: 0 1 2 3 4 5 6 7 8 9 A B C D E F
/// Exponent: sub -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 inf/nan
///
/// 0_0000_001 = 0x01 = 2**(-6) * (0 + 1/8) = 0.001953125 (least nonzero magnitude)
/// 0_0000_111 = 0x0f = 2**(-6) * (0 + 7/8) = 0.013671875 (greatest subnormal magnitude)
/// 0_0001_000 = 0x10 = 2**(-6) * (1 + 0/8) = 0.015625 (least normal nonzero magnitude)
/// 0_0111_000 = 0x30 = 2**( 0) * (1 + 0/8) = 1.0
/// 0_1110_111 = 0x6f = 2**( 7) * (1 + 7/8) = 240.0 (greatest finite magnitude)
/// ```
struct Float8 {
private (set) var bitPattern: UInt8
init(bitPattern: UInt8) {
self.bitPattern = bitPattern
}
}
// ----------------------------------------------------------------------------
// MARK: - Member Layer 1
// ----------------------------------------------------------------------------
extension Float8 {
typealias Exponent = Int
typealias RawSignificand = UInt8
typealias RawExponent = UInt
typealias Stride = Float8
typealias Magnitude = Float8
typealias FloatLiteralType = Float32
typealias IntegerLiteralType = Int64
static var exponentBitCount: Int { 4 }
static var significandBitCount: Int { 3 }
static var _exponentBias: UInt { 7 } // (1 << (exponentBitCount)) - 1
static var nan: Float8 { Float8(bitPattern: 0b0_1111_110) }
static var signalingNaN: Float8 { Float8(bitPattern: 0b0_1111_010) }
static var infinity: Float8 { Float8(bitPattern: 0b0_1111_000) }
static var _negativeInfinity: Float8 { Float8(bitPattern: 0b1_1111_000) }
static var _infinityExponent: UInt = 0b1111
static var _significandMask: UInt8 = 0b111
static var zero: Float8 { return Float8(bitPattern: 0) }
static var _negativeZero: Float8 { Float8(bitPattern: 0b1_0000_000) }
static var one: Float8 { return Float8(bitPattern: 0b0_0111_000) }
/// 0.015625
static var leastNormalMagnitude: Float8 {
Float8(bitPattern: 0b0_0001_000)
}
/// 0.001953125
static var leastNonzeroMagnitude: Float8 {
Float8(bitPattern: 0b0_0000_001)
}
/// 240.0
static var greatestFiniteMagnitude: Float8 {
Float8(bitPattern: 0b0_1110_111)
}
/// The mathematical constant pi approximated by the closest representable
/// `Float8` value less than pi, which is `3.0`.
static var pi: Float8 { return Float8(bitPattern: 0b0_1000_100) }
var exponentBitPattern: UInt { UInt((bitPattern &>> 3) & 0b1111) }
var significandBitPattern: UInt8 { bitPattern & 0b111 }
var sign: FloatingPointSign { bitPattern & 128 == 0 ? .plus : .minus }
var isCanonical: Bool { return true }
var isZero: Bool {
return self.bitPattern & 0b0_1111_111 == 0
}
static prefix func -(lhs: Float8) -> Float8 {
// I have verified that the corresponding implementation is valid for
// all bit patterns of `Float32`.
return Float8(bitPattern: lhs.bitPattern ^ 0b1_0000_000)
}
}
// ----------------------------------------------------------------------------
// MARK: - Member Layer 3
// ----------------------------------------------------------------------------
extension Float8 {
private static var _quietNaNMask: UInt8 {
return 1 &<< UInt8(significandBitCount - 1)
}
var isFinite: Bool {
return exponentBitPattern < (1 << Float8.exponentBitCount) &- 1
}
}
// ----------------------------------------------------------------------------
// MARK: - Member Layer 4
// ----------------------------------------------------------------------------
extension Float8 {
var isNormal: Bool {
return exponentBitPattern > 0 && isFinite
}
var isSubnormal: Bool {
return exponentBitPattern == 0 && significandBitPattern != 0
}
var isInfinite: Bool {
return !isFinite && significandBitPattern == 0
}
var isNaN: Bool {
return !isFinite && significandBitPattern != 0
}
}
// ----------------------------------------------------------------------------
// MARK: - Member Layer 5
// ----------------------------------------------------------------------------
extension Float8 {
var isSignalingNaN: Bool {
return isNaN && (significandBitPattern & Self._quietNaNMask) == 0
}
var exponent: Int {
if !isFinite { return .max }
if isZero { return .min }
let provisional = Int(exponentBitPattern) - Int(Self._exponentBias)
if isNormal { return provisional }
let shift = Self.significandBitCount -
significandBitPattern._binaryLogarithm()
return provisional + 1 - shift
}
}
// ----------------------------------------------------------------------------
// MARK: - Member Layer 6
// ----------------------------------------------------------------------------
extension Float8 {
init(sign: FloatingPointSign,
exponentBitPattern: UInt,
significandBitPattern: UInt8)
{
let signBits = sign == .minus
? Float8._negativeZero.bitPattern
: Float8.zero.bitPattern
let exponentBits = UInt8(truncatingIfNeeded:
(exponentBitPattern & 0b1111)) &<< Self.significandBitCount
let significandBits = significandBitPattern & Self._significandMask
self.init(bitPattern: signBits | exponentBits | significandBits)
}
var nextUp : Float8 {
// I've verified that this implementation works the same as the
// one in the standard library for `Float`.
// ------------------------------------------------------------
// Silence signaling NaNs, map -0 to +0:
// (Can't use `let x = v + 0` here)
var x = self
if x.isSignalingNaN {
x = Float8(bitPattern: x.bitPattern | Float8.nan.bitPattern)
} else if x.isZero {
x = .zero
}
if x < Float8.infinity {
let increment = Int8(bitPattern: x.bitPattern) &>> 7 | 1
let bitPattern_ = x.bitPattern &+ UInt8(bitPattern: increment)
return Float8(bitPattern: bitPattern_)
}
return x
}
public var nextDown: Float8 {
return -(-self).nextUp
}
}
// ----------------------------------------------------------------------------
// MARK: - Member Layer 7
// ----------------------------------------------------------------------------
extension Float8 {
private static func _convert<Source: BinaryInteger>(from source: Source)
-> (value: Self, exact: Bool)
{
// --------------------------------------------------------------------
// Copied with modifications from stdlib impl in FloatingPoint.swift
// --------------------------------------------------------------------
// Zero is really extra simple, and saves us from trying to normalize
// a value that cannot be normalized.
if _fastPath(source == 0) { return (Self.zero, true) }
// We now have a non-zero value; convert it to a strictly positive
// value by taking the magnitude.
let magnitude = source.magnitude
var exponent = magnitude._binaryLogarithm()
// If the exponent would be larger than the largest representable
// exponent, the result is just an infinity of the appropriate sign.
guard exponent <= Self.greatestFiniteMagnitude.exponent else {
return (
Source.isSigned && source < 0 ? ._negativeInfinity : .infinity,
false
)
}
// If exponent <= significandBitCount, we don't need to round it to
// construct the significand; we just need to left-shift it into place;
// the result is always exact as we've accounted for exponent-too-large
// already and no rounding can occur.
if exponent <= Self.significandBitCount {
let shift = Self.significandBitCount &- exponent
let significand = RawSignificand(magnitude) &<< shift
let value = Self(
sign: Source.isSigned && source < 0 ? .minus : .plus,
exponentBitPattern: Self._exponentBias + RawExponent(exponent),
significandBitPattern: significand
)
return (value, true)
}
// exponent > significandBitCount, so we need to do a rounding right
// shift, and adjust exponent if needed
let shift = exponent &- Self.significandBitCount
let halfway = (1 as Source.Magnitude) << (shift - 1)
let mask = 2 * halfway - 1
let fraction = magnitude & mask
var significand =
RawSignificand(truncatingIfNeeded: magnitude >> shift)
& Self._significandMask
if fraction > halfway || (fraction == halfway && significand & 1 == 1) {
var carry = false
(significand, carry) = significand.addingReportingOverflow(1)
if carry || significand > Self._significandMask {
exponent += 1
guard exponent <= Self.greatestFiniteMagnitude.exponent else {
return (Source.isSigned && source < 0
? ._negativeInfinity
: .infinity, false)
}
}
}
return (Self(
sign: Source.isSigned && source < 0 ? .minus : .plus,
exponentBitPattern: Self._exponentBias + RawExponent(exponent),
significandBitPattern: significand
), fraction == 0)
}
static func _convert<Source: BinaryFloatingPoint>(from source: Source)
-> (value: Self, exact: Bool)
{
// --------------------------------------------------------------------
// Copied with modifications from stdlib impl in FloatingPoint.swift
// --------------------------------------------------------------------
// NOTE: It have/had a bug in the stdlib:
// https://forums.swift.org/t/33337/31
// The following code has the fix:
precondition(Source.self != Self.self)
guard _fastPath(!source.isZero) else {
return (source.sign == .minus
? ._negativeZero
: .zero, true)
}
guard _fastPath(source.isFinite) else {
if source.isInfinite {
return (source.sign == .minus
? ._negativeInfinity
: .infinity, true)
}
// IEEE 754 requires that any NaN payload be propagated,
// if possible.
let payload_ =
source.significandBitPattern &
~(Source.nan.significandBitPattern |
Source.signalingNaN.significandBitPattern)
let mask =
Self.greatestFiniteMagnitude.significandBitPattern &
~(Self.nan.significandBitPattern |
Self.signalingNaN.significandBitPattern)
let payload = RawSignificand(truncatingIfNeeded: payload_) & mask
// Although
// .signalingNaN.exponentBitPattern == .nan.exponentBitPattern,
// we do not *need* to rely on this relation, and therefore we
// do not.
let value = source.isSignalingNaN
? Self(
sign: source.sign,
exponentBitPattern: Self.signalingNaN.exponentBitPattern,
significandBitPattern: payload |
Self.signalingNaN.significandBitPattern)
: Self(
sign: source.sign,
exponentBitPattern: Self.nan.exponentBitPattern,
significandBitPattern: payload |
Self.nan.significandBitPattern)
// We define exactness by equality after roundtripping; since NaN
// is never equal to itself, it can never be converted exactly.
return (value, false)
}
let exponent = source.exponent
var exemplar = Self.leastNormalMagnitude
let exponentBitPattern: Self.RawExponent
let leadingBitIndex: Int
let shift: Int
let significandBitPattern: Self.RawSignificand
if exponent < exemplar.exponent {
// The floating-point result is either zero or subnormal.
exemplar = Self.leastNonzeroMagnitude
let minExponent = exemplar.exponent
if exponent + 1 < minExponent {
return (source.sign == .minus ? ._negativeZero : .zero, false)
}
if _slowPath(exponent + 1 == minExponent) {
// Although the most significant bit (MSB) of a subnormal
// source significand is explicit, Swift BinaryFloatingPoint
// APIs actually omit any explicit MSB from the count
// represented in significandWidth. For instance:
//
// Double.leastNonzeroMagnitude.significandWidth == 0
//
// Therefore, we do not need to adjust our work here for a
// subnormal source.
return source.significandWidth == 0
? (source.sign == .minus ? ._negativeZero : .zero, false)
: (source.sign == .minus
? Self(bitPattern: exemplar.bitPattern | 0b10000000)
: exemplar, false)
}
exponentBitPattern = 0 as Self.RawExponent
leadingBitIndex = Int(Self.Exponent(exponent) - minExponent)
shift =
leadingBitIndex &-
(source.significandWidth &+
source.significandBitPattern.trailingZeroBitCount)
let leadingBit = source.isNormal
? (1 as Self.RawSignificand) << leadingBitIndex
: 0
significandBitPattern = leadingBit | (shift >= 0
? Self.RawSignificand(source.significandBitPattern) << shift
: Self.RawSignificand(source.significandBitPattern >> -shift))
} else {
// The floating-point result is either normal or infinite.
exemplar = Self.greatestFiniteMagnitude
if exponent > exemplar.exponent {
return (source.sign == .minus ? ._negativeInfinity : .infinity,
false)
}
exponentBitPattern = exponent < 0
? (1 as Self).exponentBitPattern - Self.RawExponent(-exponent)
: (1 as Self).exponentBitPattern + Self.RawExponent(exponent)
leadingBitIndex = exemplar.significandWidth
shift =
leadingBitIndex &-
(source.significandWidth &+
source.significandBitPattern.trailingZeroBitCount)
let sourceLeadingBit = source.isSubnormal
? (1 as Source.RawSignificand) <<
(source.significandWidth &+
source.significandBitPattern.trailingZeroBitCount)
: 0
significandBitPattern = shift >= 0
? Self.RawSignificand(
sourceLeadingBit ^ source.significandBitPattern) << shift
: Self.RawSignificand(
(sourceLeadingBit ^ source.significandBitPattern) >> -shift)
}
let value = Self(
sign: source.sign,
exponentBitPattern: exponentBitPattern,
significandBitPattern: significandBitPattern)
if source.significandWidth <= leadingBitIndex {
return (value, true)
}
// We promise to round to the closest representation, and if two
// representable values are equally close, the value with more trailing
// zeros in its significand bit pattern. Therefore, we must take a look
// at the bits that we've just truncated.
let ulp = (1 as Source.RawSignificand) << -shift
let truncatedBits = source.significandBitPattern & (ulp - 1)
if truncatedBits < ulp / 2 {
return (value, false)
}
let rounded = source.sign == .minus ? value.nextDown : value.nextUp
guard _fastPath(
truncatedBits != ulp / 2 ||
significandBitPattern.trailingZeroBitCount <
rounded.significandBitPattern.trailingZeroBitCount)
else { return (value, false) }
return (rounded, false)
}
}
// ----------------------------------------------------------------------------
// MARK: - Member Layer 8
// ----------------------------------------------------------------------------
extension Float8 {
init<Source: BinaryInteger>(_ value: Source) {
self = Self._convert(from: value).value
}
init?<Source: BinaryInteger>(exactly value: Source) {
let (value_, exact) = Self._convert(from: value)
guard exact else { return nil }
self = value_
}
init(integerLiteral value: Int64) {
self = Self._convert(from: value).value
}
init<Source: BinaryFloatingPoint>(_ value: Source) {
self = Self._convert(from: value).value
}
init?<Source: BinaryFloatingPoint>(exactly value: Source) {
let (value_, exact) = Self._convert(from: value)
guard exact else { return nil }
self = value_
}
}
// ----------------------------------------------------------------------------
// MARK: - Member Layer 9
// ----------------------------------------------------------------------------
extension Float8 {
init(sign: FloatingPointSign, exponent: Int, significand: Float8) {
self.init(Float(sign: sign,
exponent: exponent,
significand: significand.float))
}
init(floatLiteral value: Float) {
// There was an infinite recursion here for eg `Float8(-Float(0))`,
// but not for `Float8(-Float(1))` or `Float8(Float(0))`.
// This check takes care of that particular case, but are there more?
// if value == -Float(0) { self.init(bitPattern: 0b1_0000_000) }
// else { self.init(value) }
self.init(value)
}
var float: Float {
// This will be used a lot later (see code further down) when promoting
// to Float, doing arithmetic, converting back the result to Float8.
// We cannot use the following (I guess) since we have no control over
// which members of Float8 it will call (now or in the future:
//
// return Float.init(self)
//
// So we'll have to implement it ourselves:
if self.isFinite {
var zeroOrOne: Float = self.isZero ? 0.0 : 1.0
var exp = Float(exponentBitPattern) - Float(Self._exponentBias)
if isSubnormal {
zeroOrOne = 0.0
exp += 1
}
let fraction: Float = Float(bitPattern & 0b111) / 8.0
let fsign = sign == .minus ? -Float(1) : Float(1)
return fsign * powf(Float(2), exp) * (zeroOrOne + fraction)
} else if self.isInfinite {
return self.sign == .minus ? -Float.infinity : Float.infinity
}
let payload_ = self.significandBitPattern &
~(Float8.nan.significandBitPattern |
Float8.signalingNaN.significandBitPattern)
let mask = Float.greatestFiniteMagnitude.significandBitPattern &
~(Float.nan.significandBitPattern |
Float.signalingNaN.significandBitPattern)
let payload = UInt32(payload_) & mask
let nanBitPattern = isSignalingNaN
? Float.signalingNaN.bitPattern
: Float.nan.bitPattern
let signBit: UInt32 = sign == .minus ? UInt32(1) &<< UInt32(31) : 0
return Float(bitPattern: nanBitPattern | payload | signBit)
}
func distance(to other: Float8) -> Float8 {
return Float8.init(other.float - self.float)
}
func advanced(by n: Float8) -> Float8 {
return Float8.init(self.float + n.float)
}
var magnitude: Float8 {
return Float8.init(self.float.magnitude)
}
}
// TODO: Sort members of this extension into appropriate "layers":
extension Float8 : BinaryFloatingPoint {
var significand: Float8 {
if isNaN { return self }
if isNormal {
return Float8(sign: .plus,
exponentBitPattern: Self._exponentBias,
significandBitPattern: significandBitPattern)
}
if isSubnormal {
let shift = Self.significandBitCount -
significandBitPattern._binaryLogarithm()
return Float8(
sign: .plus,
exponentBitPattern: Self._exponentBias,
significandBitPattern: significandBitPattern &<< shift
)
}
// zero or infinity.
return Float8(
sign: .plus,
exponentBitPattern: exponentBitPattern,
significandBitPattern: 0
)
}
var ulp: Float8 {
guard isFinite else { return .nan }
if isNormal {
let bitPattern_ = bitPattern & Self.infinity.bitPattern
return Float8(bitPattern: bitPattern_) * 0x1p-3
}
return .leastNormalMagnitude * 0x1p-3
}
var binade: Float8 {
guard isFinite else { return Float8.nan }
if isSubnormal {
let shifts = (bitPattern & 0b0_0000_111).leadingZeroBitCount
let signBit = bitPattern & 0b1_0000_000
return Float8(bitPattern: signBit | (UInt8(1) &<< (7 &- shifts)))
}
return Float8(bitPattern:
bitPattern & (Float8._negativeInfinity).bitPattern)
}
var significandWidth: Int {
let trailingZeroBits = significandBitPattern.trailingZeroBitCount
if isNormal {
guard significandBitPattern != 0 else { return 0 }
return Self.significandBitCount &- trailingZeroBits
}
if isSubnormal {
let leadingZeroBits = significandBitPattern.leadingZeroBitCount
return Self.RawSignificand.bitWidth &-
(trailingZeroBits &+ leadingZeroBits &+ 1)
}
return -1
}
mutating func round(_ rule: FloatingPointRoundingRule) {
var f = self.float
f.round(rule)
self = Float8(f)
}
static func - (lhs: Float8, rhs: Float8) -> Float8 {
return Float8(lhs.float - rhs.float)
}
static func * (lhs: Float8, rhs: Float8) -> Float8 {
return Float8(lhs.float * rhs.float)
}
static func *= (lhs: inout Float8, rhs: Float8) {
var f = lhs.float
f *= rhs.float
lhs = Float8(f)
}
static func / (lhs: Float8, rhs: Float8) -> Float8 {
return Float8(lhs.float / rhs.float)
}
static func /= (lhs: inout Float8, rhs: Float8) {
var f = lhs.float
f /= rhs.float
lhs = Float8(f)
}
static func += (lhs: inout Float8, rhs: Float8) {
var f = lhs.float
f += rhs.float
lhs = Float8(f)
}
static func + (lhs: Float8, rhs: Float8) -> Float8 {
let r = lhs.float + rhs.float
return Float8.init(r)
}
static func -= (lhs: inout Float8, rhs: Float8) {
var f = lhs.float
f -= rhs.float
lhs = Float8(f)
}
mutating func formRemainder(dividingBy other: Float8) {
var f = self.float
f.formRemainder(dividingBy: other.float)
self = Float8(f)
}
mutating func formTruncatingRemainder(dividingBy other: Float8) {
var f = self.float
f.formTruncatingRemainder(dividingBy: other.float)
self = Float8(f)
}
mutating func formSquareRoot() {
var f = self.float
f.formSquareRoot()
self = Float8(f)
}
mutating func addProduct(_ lhs: Float8, _ rhs: Float8) {
var f = self.float
f.addProduct(lhs.float, rhs.float)
self = Float8(f)
}
func isEqual(to other: Float8) -> Bool {
return self.float.isEqual(to: other.float)
}
func isLess(than other: Float8) -> Bool {
return self.float.isLess(than: other.float)
}
func isLessThanOrEqualTo(_ other: Float8) -> Bool {
return self.float.isLessThanOrEqualTo(other.float)
}
}
extension Float8 : CustomStringConvertible, LosslessStringConvertible {
var description: String { return "\(self.float)" }
init?(_ description: String) {
guard let f32 = Float(description) else { return nil }
let f8 = Float8(f32)
if f8.description != description { return nil }
self = f8
}
}
//-----------------------------------------------------------------------------
// MARK: - Demo
//-----------------------------------------------------------------------------
extension String {
func leftPadded(to minCount: Int, with char: Character=" ") -> String {
let c = max(0, minCount-count)
if c == 0 { return self }
return String(repeating: char, count: c) + self
}
}
extension BinaryFloatingPoint {
var segmentedBinaryString: String {
let e = String(exponentBitPattern, radix: 2)
let s = String(significandBitPattern, radix: 2)
return [self.sign == .plus ? "0" : "1", "_",
e.leftPadded(to: Self.exponentBitCount, with: "0"), "_",
s.leftPadded(to: Self.significandBitCount, with: "0")].joined()
}
}
extension LosslessStringConvertible {
func leftPadded(to minCount: Int, with char: Character=" ") -> String {
return description.leftPadded(to: minCount, with: char)
}
}
extension Float8 {
static func test() {
// --------------------------------------------------------------------
// See https://forums.swift.org/t/33337/19 and
// https://forums.swift.org/t/33337/23
// for details about this.
precondition(Float8.significandBitCount >= 2) // at least 3 bits, one
// of which may be implicit.
precondition(Float8.exponentBitCount >= 2)
// IEEE-754 imposes the following constraints on the exponent field:
let emin = Float8.leastNormalMagnitude.exponent
let emax = Float8.greatestFiniteMagnitude.exponent
precondition(emin <= emax)
precondition(emax >= 2)
precondition(emin == 1 - emax)
precondition(emax == (1 << (Float8.exponentBitCount - 1)) - 1)
// --------------------------------------------------------------------
// Some other checks:
// If x is -leastNonzeroMagnitude, then x.nextUp is -0.0.
do {
var x = Float8.leastNonzeroMagnitude.nextDown
precondition(x.isZero && x.sign == .plus)
x = (-Float8.leastNonzeroMagnitude).nextUp
precondition(x.isZero && x.sign == .minus)
}
precondition(Float8.leastNonzeroMagnitude.significandWidth == 0)
precondition(Float8.leastNonzeroMagnitude.nextUp.significandWidth == 0)
precondition(Float8.leastNonzeroMagnitude.nextUp.nextUp.significandWidth == 1)
precondition(Float8(-Float(0)).ulp > 0)
precondition((0 as Float8).binade == -Float8(0) + -0.0)
precondition(Double(Float8.zero.ulp) ==
Double(sign: .plus,
exponent: 1 - Int(Float8._exponentBias),
significand: 1.0 / Double(1 << Float8.significandBitCount)))
precondition(Float8.leastNonzeroMagnitude.binade * -1 == -Float8.zero.ulp)
precondition(-Float8.leastNonzeroMagnitude.nextUp.binade == -(Float8.zero.ulp * 2 + -0.0))
do {
let a = Float8.greatestFiniteMagnitude
let b = a.ulp / 2
precondition(a + b == .infinity)
precondition(a + b.nextDown == a)
precondition(-a - b == -.infinity)
precondition(-a - b.nextDown == -a)
}
// Print all values:
var finCount = 0
var infCount = 0
var nanCount = 0
print(" N Float8 bitPattern exponent significand binade ulp")
print("-------------------------------------------------------------------------------")
for byteValue: UInt8 in .min ... .max {
let v = Float8(bitPattern: byteValue)
let expStr: String
switch v.exponent {
case .min: expStr = "Int.min"
case .max: expStr = "Int.max"
default: expStr = v.exponent.description
}
print(
byteValue.leftPadded(to: 4),
v.leftPadded(to: 12),
v.segmentedBinaryString.leftPadded(to: 12),
expStr.leftPadded(to: 9),
v.significand.leftPadded(to: 12),
v.binade.leftPadded(to: 12),
v.ulp.leftPadded(to: 12),
v.isSubnormal ? "subnormal" : v.isNormal ? "normal" : "n/a"
)
if v.isFinite { finCount += 1 }
if v.isNaN { nanCount += 1 }
if v.isInfinite { infCount += 1 }
}
print("Number of finite values:", finCount)
print("Number of infinite values:", infCount)
print("Number of NaNs:", nanCount)
precondition(finCount + infCount + nanCount == 256)
print("--")
var w: (Float8, Float8) = (-240, 240)
while true {
print(w.0.leftPadded(to: 12),
w.0.segmentedBinaryString.leftPadded(to: 12),
w.1.segmentedBinaryString.leftPadded(to: 12),
w.1.leftPadded(to: 12)
)
precondition(w.0 + w.1 == -0.0) // use -0.0 just to check
precondition((w.0 - w.1).sign == w.0.sign)
precondition(w.0.nextUp.nextDown == w.0)
if w.0.nextUp.isInfinite || w.1.nextDown.isInfinite { break }
w = (w.0.nextUp, w.1.nextDown)
}
precondition(w == (240, -240))
print("--")
}
}
Float8.test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.