Skip to content

Instantly share code, notes, and snippets.

@eruffaldi eruffaldi/posit_one.h
Last active Jan 24, 2019

Embed
What would you like to do?
posit_one.h
// cppPosit 7b7b5ecde436a65923d679d91ddb1b29b711af27
/**
* Emanuele Ruffaldi (C) 2017-2018
*
* My personal bit hip pop using BMI extensions and a bit of constexpr!
*/
#if defined(__SDSVHLS__) && !defined(FPGAHLS)
#define FPGAHLS
#endif
#if defined(__llvm__) && __clang_major__ > 3
#define HAS_bextr_u64
#endif
#include <stdint.h>
#include <bitset>
// CPU detection x86
#if defined(__x86_64) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__)
#define __is_x86_any__
#endif
#ifdef __is_x86_any__
#ifdef _MSC_VER
#include <intrin.h>
#include <immintrin.h>
#else
#include <x86intrin.h>
#endif
#endif
#ifndef CONSTEXPR14
#if __cplusplus >= 201402L
#define CONSTEXPR14 constexpr
#else
#define CONSTEXPR14
#endif
#endif
#if !defined(FPGAHLS) && !defined(_MSC_VER)
#define CLZCONSTEXPR constexpr
#else
#define CLZCONSTEXPR
#endif
// C version
#define BIT_MASK(__TYPE__, __ONE_COUNT__) \
((__TYPE__)(-((__ONE_COUNT__) != 0))) & (((__TYPE__)-1) >> ((sizeof(__TYPE__) * 8) - (__ONE_COUNT__)))
template <typename R>
constexpr R bitmask(unsigned int const onecount)
{
return static_cast<R>(-(onecount != 0)) & (static_cast<R>(-1) >> ((sizeof(R) * 8) - onecount));
}
template <typename T>
constexpr typename std::remove_reference<T>::type makeprval(T &&t)
{
return t;
}
#define isprvalconstexpr(e) noexcept(makeprval(e))
#ifdef _MSC_VER
// note this is not constexpr due to _BitScanReverse
static uint32_t __inline __builtin_clz(uint32_t x)
{
unsigned long r = 0;
_BitScanReverse(&r, x);
return (31-r);
}
#endif
// __builtin_clzll
CLZCONSTEXPR inline uint64_t __builtin_clz64(uint64_t v)
{
return (v >> 32 != 0 ? __builtin_clz(v >> 32) : 32 + __builtin_clz(v));
}
// this is constexpr, others not
// never call with input==0
CLZCONSTEXPR inline int findbitleftmostC(uint64_t input)
{
return __builtin_clz64(input);
}
// this is constexpr, others not
// never call with input==0
CLZCONSTEXPR inline int findbitleftmostC(uint32_t input)
{
return __builtin_clz(input);
}
// this is constexpr, others not
// never call with input==0
CLZCONSTEXPR inline int findbitleftmostC(uint16_t input)
{
return __builtin_clz((uint32_t)input) - 16;
}
// this is constexpr, others not
// never call with input==0
CLZCONSTEXPR inline int findbitleftmostC(uint8_t input)
{
return __builtin_clz((uint32_t)input) - 24;
}
#if 0
#if !defined(__arm__) && !defined(FPGAHLS)
# make a more realiable detection
inline int findbitleftmost(uint8_t input)
{
return __lzcnt16(input) - 8;
}
inline int findbitleftmost(uint16_t input)
{
return __lzcnt16(input);
}
inline int findbitleftmost(uint32_t input)
{
return __lzcnt32(input);
}
/* CSIM
inline int findbitleftmost(uint64_t input)
{
return __lzcnt64(input);
}
*/
// detect constexpr for X so we can speedup
#define findbitleftmost(X) (isprvalconstexpr(X) ? findbitleftmostC(X) : findbitleftmost(X))
#endif
#endif
// indices are with 0 on the right
template <class T, class Y, int offset, int size>
constexpr T bitset_part(T input, Y value)
{
return (input & ~bitmask<T>(size)) | ((value & bitmask<T>(size)) << offset);
}
template <class T, class Y>
CONSTEXPR14 T bitset_part(T input, Y value, int offset, int size)
{
auto M = bitmask<T>(size);
return (input & ~M) | ((value & M) << offset);
}
template <class T, int offset, int size>
constexpr T bitset_get(T input)
{
return (input >> offset) & bitmask<T>(size);
}
template <class T>
CONSTEXPR14 T bitset_get(T input, int offset, int size)
{
auto M = bitmask<T>(size);
return (input >> offset) & M;
}
#if defined(__is_x86_any__) && !defined(FPGAHLS) && defined(__BMI__)
/* CSIM
inline uint64_t bitset_gethw(uint64_t input, int offset, int size)
{
return _bextr_u64(input, offset, size);
}
*/
inline uint16_t bitset_gethw(uint16_t input, int offset, int size)
{
return _bextr_u32(input, offset, size);
}
inline uint32_t bitset_gethw(uint32_t input, int offset, int size)
{
return _bextr_u32(input, offset, size);
}
//#ifdef HAS_bextr_u64
template <int offset, int size>
uint64_t bitset_gethwT(uint64_t input)
{
return _bextr_u64(input, offset, size);
}
//#endif
template <int offset, int size>
uint32_t bitset_gethwT(uint32_t input)
{
return _bextr_u32(input, offset, size);
}
template <int offset, int size>
uint16_t bitset_gethwT(uint16_t input)
{
return _bextr_u32(input, offset, size);
}
template <int offset, int size>
uint8_t bitset_gethwT(uint8_t input)
{
return _bextr_u32(input, offset, size);
}
// with template
#define bitset_getT(X, A, B) (isprvalconstexpr(X) ? bitset_gethwT<A, B>(X) : bitset_get<decltype(X), A, B>(X))
// with flexible arguments
#define bitset_get(X, A, B) (isprvalconstexpr(X) && isprvalconstexpr(A) && isprvalconstexpr(B) ? bitset_gethw(X, A, B) : bitset_get(X, A, B))
#else
#define bitset_getT(X, A, B) (bitset_get<decltype(X), A, B>(X))
#endif
template <class T, int N>
struct bitset_leftmost_get_const
{
constexpr T operator()(T X) const
{
return (T)bitset_getT((typename std::make_unsigned<T>::type)(X), sizeof(X) * 8 - N, N);
}
};
template <class T>
struct bitset_leftmost_get_const<T, 0>
{
constexpr T operator()(T X) const
{
return 0;
}
};
#define bitset_leftmost_getT(X, N) bitset_getT(X, sizeof(X) * 8 - N, N)
template <class T>
constexpr std::bitset<sizeof(T) * 8> bitset_all(const T x)
{
return std::bitset<sizeof(T) * 8>(x);
}
template <class T, int N>
constexpr std::bitset<N> bitset_msb(const T x)
{
return std::bitset<N>(x >> (sizeof(T) * 8 - N));
}
template <class T, int N>
constexpr std::bitset<N> bitset_lsb(const T x)
{
return std::bitset<N>(x);
}
/// absolute value of signed integer with conditions
template <class T>
constexpr T pcabs(T x)
{
return x < 0 ? -x : x;
}
///
/// absolute value of signed integer without conditions
template <class T> // ,typename std::enable_if<std::is_integral<T>::value ,int>::type* = nullptr>
CONSTEXPR14 T pabs(T x)
{
T mask = (x >> (sizeof(T) * 8 - 1));
return (x + mask) ^ mask;
}
// support
template <class A, int abits, class B, int bbits, bool abits_gt_bbits>
struct cast_msb_
{
};
// support
template <class A, int abits, class B, int bbits>
struct cast_msb_<A, abits, B, bbits, true>
{
constexpr B operator()(A value) const
{
// #A > #B
return value >> (abits - bbits);
}
};
// support
// #B >= #A
template <class A, int abits, class B, int bbits>
struct cast_msb_<A, abits, B, bbits, false>
{
constexpr B operator()(A value) const
{
return ((B)value) << (bbits - abits);
}
};
/// MSB aligned data cast from type A to type B
template <class A, int abits, class B, int bbits>
struct cast_msb : public cast_msb_<A, abits, B, bbits, (abits > bbits)>
{
static_assert(std::is_unsigned<A>::value, "required unsigned A");
static_assert(std::is_unsigned<B>::value, "required unsigned B");
};
// support
template <class A, int abits, class B, int bbits, bool abits_gt_bbits>
struct cast_right_to_left_
{
};
// support
template <class A, int abits, class B, int bbits>
struct cast_right_to_left_<A, abits, B, bbits, true>
{
constexpr B operator()(A value) const
{
// #A > #B
return bitset_getT(value, 0, abits) >> (abits - bbits);
}
};
// support
// #B >= #A
template <class A, int abits, class B, int bbits>
struct cast_right_to_left_<A, abits, B, bbits, false>
{
constexpr B operator()(A value) const
{
return ((B)bitset_getT(value, 0, abits)) << (bbits - abits);
}
};
/// MSB aligned data cast from type A to type B
template <class A, int abits, class B, int bbits>
struct cast_right_to_left : public cast_right_to_left_<A, abits, B, bbits, (abits > bbits)>
{
static_assert(std::is_unsigned<A>::value, "required unsigned A");
static_assert(std::is_unsigned<B>::value, "required unsigned B");
};
template <class T, int N, int F>
struct fixedtrait
{
static_assert(sizeof(T)*8 <= N,"fixedtrait holding type is too small");
static_assert(N > 0,"fixedtrait total bits should be positive");
static_assert(F <= N && F >= 0,"fraction bits should be less than N and not negative");
static_assert(std::is_integral<T>::value && std::is_signed<T>::value,"only for signed integrals");
using value_t = T;
static constexpr int totalbits = N;
static constexpr int fraction_bits = F;
};
#if 0
#if __cplusplus >= 201402L
template <class T>
struct fixedtrait<T,sizeof(T)*8,0>
{
static_assert(std::is_integral<T>::value && std::is_signed<T>::value,"only for signed integrals");
using value_t = T;
static constexpr int totalbits = sizeof(T)*8;
static constexpr int fraction_bits = 0;
};
#endif
#endif
#include <limits>
#include <cstdint>
#if defined(__SDSVHLS__) && !defined(FPGAHLS)
#define FPGAHLS
#endif
// wrapper for custom floats holdi
template <class T>
struct valuewrap
{
T what;
constexpr valuewrap(T w): what(w) {}
constexpr valuewrap() : what(0) {}
};
struct halffloat : public valuewrap<uint16_t>
{
using valuewrap<uint16_t>::valuewrap;
};
struct halffloatalt : public valuewrap<uint16_t>
{
using valuewrap<uint16_t>::valuewrap;
};
struct microfloat : public valuewrap<uint8_t>
{
using valuewrap<uint8_t>::valuewrap;
};
/// holder_T is an unsigned integer capable of storing 1+exp_bits+frac_bits exactly
/// value_T is the struct or native type used for this
template <int exp_bits, int frac_bits, class value_T, class holder_T, bool with_denorm_ = true>
struct any_floattrait
{
using value_t = value_T;
using holder_t = holder_T;
static_assert(exp_bits+1+frac_bits == sizeof(holder_t)*8,"holding size");
//static_assert<!std::is_signed<holder_t> >;
static constexpr int data_bits = exp_bits+frac_bits+1;
static constexpr int exponent_bits = exp_bits;
static constexpr int fraction_bits = frac_bits;
static constexpr int exponent_bias = (1<<(exp_bits-1))-1;
static constexpr int exponent_max = (1<<(exp_bits))-2;
static constexpr int with_denorm = with_denorm_;
static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1;
enum : holder_t {
signbit = ((holder_t)(1))<<(data_bits-1),
pinfinity_h = bitmask<holder_t>(exp_bits) << frac_bits, // 0 1[e] 0[f]
ninfinity_h = pinfinity_h | signbit, // 1 1[e] 0[f]
// many nan ar possible, we pick the one with
nan_h = bitmask<holder_t>(exp_bits+1) << (frac_bits-1), // 0 1[e] 1 0[f-1]
one_h = bitmask<holder_t>(exp_bits-1) << (frac_bits), // 0 0 1[e-1] 0[f]
afterone_h = one_h+1, // 0 0 1[e-1] 0[f-1] 1
max_h = (bitmask<holder_t>(exp_bits-1) << (frac_bits+1)) | bitmask<holder_t>(frac_bits), // 0 1[e-1] 0 1[f]
lowest_h = max_h | signbit,
min_h = ((holder_t)(1)) << frac_bits, // 0 0[e-1] 1 0[f]
two_h = ((holder_t)(1)) << (exp_bits-1+frac_bits) // 0 1 0[e-1+f]
};
};
// pulp8 alternative
using microfloat_trait = any_floattrait<5,2,microfloat,uint8_t>;
// PULP 8E,7M vs classic 5E,10P
using half_traitalt = any_floattrait<8,7,halffloatalt,uint16_t>;
// Intel bfloat16 as 8,7 without denormals
using bfloat16_trait = any_floattrait<8,7,halffloatalt,uint16_t, false>;
// https://en.wikipedia.org/wiki/16-bit
struct half_trait // : public any_floattrait<5,10,halffloat,uint16_t>
{
using value_t = halffloat;
using holder_t = uint16_t;
static constexpr holder_t ninfinity_h = 0xFC00;
static constexpr holder_t pinfinity_h = 0x7C00;
static constexpr holder_t nan_h = 0x7E00;
static constexpr holder_t one_h = 0x3C00; // one next is just + 1
static constexpr holder_t two_h = 0x4000;
static constexpr holder_t max_h = 0x7bff;
static constexpr holder_t min_h = 0x0400;
static constexpr holder_t lowest_h = 0xfbff; // -max_h
static constexpr int with_denorm = true;
// max subnormal 0 00000 1111111111 2−24 ≈ 6.09756 × 10−5
// min subnormal 0 00000 0000000001 5.96046 × 10−8
static constexpr int data_bits = 16; // can be derived from value_t
static constexpr int exponent_bits = 5;
static constexpr int fraction_bits = 10; // can be derived from data_bits and exponent_bits
static constexpr int exponent_bias = 15;
static constexpr int exponent_max = 30; // can be derived from bias and bits
static constexpr holder_t signbit = ((holder_t)(1))<<(data_bits-1);
static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1; // TODO make it from exponent_bits
};
// https://en.wikipedia.org/wiki/Single-precision_floating-point_format
struct single_trait
{
#ifndef FPGAHLS
using value_t = float;
#endif
using holder_t = uint32_t;
#ifndef FPGAHLS
static constexpr value_t zero = 0.0f;
static constexpr value_t ninfinity = -std::numeric_limits<value_t>::infinity();
static constexpr value_t pinfinity = std::numeric_limits<value_t>::infinity();
#endif
static constexpr holder_t ninfinity_h = 0xff800000;
static constexpr holder_t pinfinity_h = 0x7f800000;
static constexpr holder_t nan_h = 0x7fc00000;
static constexpr holder_t one_h = 0x3f800000;
static constexpr holder_t two_h = 0x40000000;
static constexpr holder_t max_h = 0x7f7fffff;
static constexpr holder_t min_h = 0x00800000;
static constexpr holder_t denorm_min_h = 1; // just 1 after 0
static constexpr holder_t epsilon_h = 0x34000000; // trickier
static constexpr holder_t lowest_h = 0xff7fffff; // -max_h
static constexpr int with_denorm = true;
static constexpr int data_bits = 32; // can be derived from value_t
static constexpr int exponent_bits = 8;
static constexpr int fraction_bits = 23; // can be derived from data_bits and exponent_bits
static constexpr int exponent_bias = 127;
static constexpr int exponent_max = 254; // can be derived from bias and bits
static constexpr holder_t signbit = ((holder_t)(1))<<(data_bits-1);
static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1; // TODO make it from exponent_bits
};
// https://en.wikipedia.org/wiki/Double-precision_floating-point_format
struct double_trait
{
#ifndef FPGAHLS
using value_t = double;
#endif
using holder_t = uint64_t;
#ifndef FPGAHLS
static constexpr value_t zero = 0.0;
static constexpr value_t ninfinity = -std::numeric_limits<value_t>::infinity();
static constexpr value_t pinfinity = std::numeric_limits<value_t>::infinity();
#endif
static constexpr holder_t ninfinity_h = 0xfff0000000000000ULL;
static constexpr holder_t pinfinity_h = 0x7ff0000000000000ULL;
static constexpr holder_t nan_h = 0x7ff8000000000000ULL;
static constexpr holder_t one_h = 0x3ff0000000000000ULL;
static constexpr holder_t two_h = 0x4000000000000000ULL;
static constexpr holder_t max_h = 0x7fefffffffffffff; // TODO
static constexpr holder_t lowest_h = 0xffefffffffffffff; // TODO
static constexpr holder_t min_h = 0x10000000000000; // TODO
static constexpr int with_denorm = true;
static constexpr int data_bits = 64; // can be derived from value_t
static constexpr int exponent_bits = 11;
static constexpr int fraction_bits = 52; // can be derived from data_bits and exponent_bits
static constexpr int exponent_bias = 1023;
static constexpr int exponent_max = 2046;
static constexpr holder_t signbit = ((holder_t)(1))<<(data_bits-1);
static constexpr uint32_t exponent_mask = (((holder_t)(1))<<exponent_bits)-1; // 7FF
};
#ifdef FLT128_MAX
// https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format
// https://gcc.gnu.org/onlinedocs/gcc/Floating-Types.html
struct float128_trait
{
#ifndef FPGAHLS
using value_t = __float128;
#endif
using holder_t = unsigned __int128;
#ifndef FPGAHLS
static constexpr value_t zero = 0.0;
static constexpr value_t ninfinity = -std::numeric_limits<value_t>::infinity();
static constexpr value_t pinfinity = std::numeric_limits<value_t>::infinity();
#endif
static constexpr holder_t ninfinity_h = 0xb00000000000ffff8000000000000000;
static constexpr holder_t pinfinity_h = 0x900007f975247fff8000000000000000;
static constexpr holder_t nan_h = 0xb000000000007fffc000000000000000;
static constexpr holder_t one_h = 0xd00007f975243fff8000000000000000;
static constexpr holder_t two_h = 0x40000000000000000000000000000000;
static constexpr holder_t max_h = 0x4000; // TODO
static constexpr holder_t lowest_h = 0x4000; // TODO
static constexpr holder_t min_h = 0x4000; // TODO
static constexpr int with_denorm = true;
static constexpr int data_bits = 128; // can be derived from value_t
static constexpr int exponent_bits = 15;
static constexpr int fraction_bits = 112; // can be derived from data_bits and exponent_bits
static constexpr int exponent_bias = 16383;
static constexpr int exponent_max = 16383*2;
static constexpr holder_t signbit = (1<<(data_bits-1));
static constexpr uint32_t exponent_mask = (1<<exponent_bits)-1; // TODO make it from exponent_bits
};
#endif
/**
Casting between differently arbitrary floats requires:
- exponent:
oldexponent - oldbias +newbias
- fraction assumed to be left aligned
nothing just keep left aligned, only newfraction bits be used
See:
template <class FT,class ET>
template <class Trait>
CONSTEXPR14 typename Trait::holder_t Unpacked<FT,ET>::pack_xfloati() const
template <class Trait, typename = typename std::enable_if<!std::is_integral<typename Trait::value_t>::value>::type>
explicit CONSTEXPR14 Unpacked(typename Trait::holder_t i) { unpack_xfloat<Trait>(i); }
*/
template <class T>
struct float2trait
{};
template <>
struct float2trait<float>
{
using type = float;
using trait = single_trait;
};
template <>
struct float2trait<double>
{
using type = double;
using trait = double_trait;
};
template <>
struct float2trait<halffloat>
{
using type = halffloat;
using trait = half_trait;
};
template <>
struct float2trait<halffloatalt>
{
using type = halffloatalt;
using trait = half_traitalt;
};
#ifdef FLT128_MAX
template <>
struct float2trait<__float128>
{
using type = __float128;
using trait = float128_trait;
};
#endif
#if 0
template <class Trait>
struct limithelper
{
using T=Trait;
static constexpr bool is_specialized = true;
static constexpr T min() noexcept { return T::min(); }
static constexpr T max() noexcept { return T::max(); }
static constexpr T lowest() noexcept { return T::lowest (); }
//static constexpr int digits = 0; number of digits (in radix base) in the mantissa
//static constexpr int digits10 = 0;
static constexpr bool is_signed = true;
static constexpr bool is_integer = false;
static constexpr bool is_exact = false;
static constexpr int radix = 2;
static constexpr T epsilon() noexcept { return T::one_next()-T::one(); }
//static constexpr T round_error() noexcept { return T(); }
// this is also the maximum integer
static constexpr int min_exponent = PT::minexponent();
// static constexpr int min_exponent10 = 0;
static constexpr int max_exponent = PT::maxexponent();
//static constexpr int max_exponent10 = 0;
static constexpr bool has_infinity = true;
static constexpr bool has_quiet_NaN = withnan;
static constexpr bool has_signaling_NaN = false;
//static constexpr float_denorm_style has_denorm = denorm_absent;
static constexpr bool has_denorm_loss = false;
static constexpr T infinity() noexcept { return T::infinity(); }
static constexpr T quiet_NaN() noexcept { return T::nan(); }
//static constexpr T signaling_NaN() noexcept { return T(); }
static constexpr T denorm_min() noexcept { return T::min(); }
static constexpr bool is_iec559 = false;
static constexpr bool is_bounded = false;
static constexpr bool is_modulo = false;
static constexpr bool traps = false;
static constexpr bool tinyness_before = false;
};
#endif
#if 0
namespace std
{
template <>
struct limits<halffloat>: public limithelper<half_trait>
{
};
template <>
struct limits<halffloatalt>: public limithelper<half_traitalt>
{
};
template <>
struct limits<microfloat>: public limithelper<microfloat_trait>
{
};
}
#endif
/**
* Various Integr Type Helpers
*
* Emanuele Ruffaldi 2017
*/
#include <cstdint>
#include <inttypes.h>
#if (defined(__llvm__) && __clang_major__ > 3) || (defined(__GNUC__) && __GNUC__ >= 4)
#define HAS128T
#endif
#ifdef HAS128T
#define int128_t __int128_t
#endif
/// returns the larges type between two
template <class A,class B>
using largest_type = typename std::conditional<sizeof(A) >= sizeof(B), A, B>::type;
/// Given size in bits returns the integer with given size
/// Equivalent to: http://www.boost.org/doc/libs/1_48_0/libs/integer/doc/html/boost_integer/integer.html#boost_integer.integer.sized
namespace detail_least
{
template< int Category > struct int_least_helper {};
#ifdef HAS128T
template<> struct int_least_helper<1> { typedef int128_t least; };
#endif
template<> struct int_least_helper<2> { typedef int64_t least; };
template<> struct int_least_helper<3> { typedef int32_t least; };
template<> struct int_least_helper<4> { typedef int16_t least; };
template<> struct int_least_helper<5> { typedef int8_t least; };
}
/// Given size in bits returns the integer with given size
template <unsigned int N>
struct int_least_bits
{
typedef typename detail_least::int_least_helper<((N) <= 8) + ((N) <= 16) + ((N) <= 32) + ((N) <= 64) + ((N) <= 128)>::least type;
};
/// Helper for avoiding the fact that int8_t and uint8_t are printerd as chars in iostream
template <class T>
struct printableinttype
{
using type = T;
};
template <class T>
struct printableinttype<const T>
{
using typex = typename printableinttype<T>::type;
using type = const typex;
};
template <>
struct printableinttype<uint8_t>
{
using type = uint16_t;
};
template <>
struct printableinttype<int8_t>
{
using type = int16_t;
};
/// next integer type in size: signed and unsigned
template <class T>
struct nextinttype
{};
#ifdef HAS128T
template <>
struct nextinttype<uint64_t>
{
using type = __uint128_t;
};
#endif
template <>
struct nextinttype<uint32_t>
{
using type = uint64_t;
};
template <>
struct nextinttype<uint16_t>
{
using type = uint32_t;
};
template <>
struct nextinttype<uint8_t>
{
using type = uint16_t;
};
#ifdef HAS128T
template <>
struct nextinttype<int64_t>
{
using type = __int128_t;
};
#endif
template <>
struct nextinttype<int32_t>
{
using type = int64_t;
};
template <>
struct nextinttype<int16_t>
{
using type = int32_t;
};
template <>
struct nextinttype<int8_t>
{
using type = int16_t;
};
/**
* Emanuele Ruffaldi (C) 2017-2018
*
* cppPosit project
* gneralized soft float in unpackd form
*
*/
#include <cstdint>
#ifndef FPGAHLS
#include <iostream>
#endif
#include <inttypes.h>
#include <math.h>
#include <bitset>
#include <limits>
#include <ratio>
#if !defined(FPGAHLS) && defined(HAS128T)
inline std::ostream &operator<<(std::ostream &ons, __int128_t x)
{
ons << "cannot print int128";
return ons;
}
#endif
template <class T>
constexpr const T &clamp(const T &v, const T &lo, const T &hi)
{
return v < lo ? lo : v > hi ? hi : v;
}
template <class T>
constexpr T FLOORDIV(T a, T b)
{
return ((a) / (b) - ((a) % (b) < 0));
}
template <class FT = uint64_t, class ET = int32_t>
struct Unpacked
{
static_assert(std::is_unsigned<FT>::value,
"Unpacked requires unsigned fractiont type");
static_assert(std::is_signed<ET>::value,
"Unpacked requires signed exponent type");
using POSIT_LUTYPE = FT;
enum
{
FT_bits = sizeof(FT) * 8
};
enum : FT
{
FT_leftmost_bit = (((FT)1) << (FT_bits - 1))
};
#ifndef UnpackedDualSel
#define UnpackedDualSel(a, b) ((a) + (b)*4)
#endif
enum Type
{
Regular,
Infinity,
NaN,
Zero
}; /// signed infinity and nan require the extra X bit
Type type = Regular;
bool negativeSign = false;
ET exponent = 0; // with sign
FT fraction = 0; // this can be 52bit for holding double.
struct single_tag
{
};
explicit CONSTEXPR14 Unpacked(single_tag, uint32_t p)
{
unpack_xfloati<single_trait>(p);
}
explicit constexpr Unpacked() {}
// assume regular
CONSTEXPR14 Unpacked normalized() const
{
if (fraction == 0)
{
return Unpacked(Zero, false);
}
else
{
int k = findbitleftmostC(fraction);
return Unpacked(exponent - k, fraction << (k + 1), negativeSign);
}
}
explicit CONSTEXPR14 Unpacked(halffloat p) { unpack_half(p); }
explicit CONSTEXPR14 Unpacked(int i) { unpack_int(i); }
explicit CONSTEXPR14 Unpacked(Type t, bool anegativeSign = false)
: type(t), negativeSign(anegativeSign){};
template <class Trait, typename = typename std::enable_if<std::is_integral<
typename Trait::value_t>::value>::type>
explicit CONSTEXPR14 Unpacked(typename Trait::value_t i)
{
unpack_xfixed<Trait>(i);
}
template <class Trait, typename = typename std::enable_if<!std::is_integral<
typename Trait::value_t>::value>::type>
explicit CONSTEXPR14 Unpacked(typename Trait::holder_t i)
{
unpack_xfloat<Trait>(i);
}
// expect 1.xxxxxx otherwise make it 0.xxxxxxxxx
explicit CONSTEXPR14 Unpacked(ET aexponent, FT afraction, bool anegativeSign)
: type(Regular),
negativeSign(anegativeSign),
exponent(aexponent),
fraction(afraction) {}
#ifndef FPGAHLS
explicit CONSTEXPR14 Unpacked(float p)
{
unpack_float(p);
}
explicit CONSTEXPR14 Unpacked(double p) { unpack_double(p); }
CONSTEXPR14 Unpacked &unpack_float(float f)
{
return unpack_xfloat<single_trait>(f);
}
CONSTEXPR14 Unpacked &unpack_double(double d)
{
return unpack_xfloat<double_trait>(d);
}
constexpr operator float() const { return pack_xfloat<single_trait>(); }
constexpr operator double() const { return pack_xfloat<double_trait>(); }
#endif
CONSTEXPR14 Unpacked &unpack_half(halffloat d)
{
return unpack_xfloat<half_trait>(d);
}
CONSTEXPR14 Unpacked &unpack_int(int i)
{
return unpack_xfixed<fixedtrait<int, sizeof(int) * 8, 0>>(i);
}
constexpr operator halffloat() const { return pack_xfloat<half_trait>(); }
constexpr operator int() const
{
return pack_xfixed<fixedtrait<int, sizeof(int) * 8, 0>>();
}
template <class Trait>
CONSTEXPR14 typename Trait::holder_t pack_xfloati() const;
template <class Trait>
CONSTEXPR14 typename Trait::value_t pack_xfixed() const;
template <class Trait>
typename Trait::value_t pack_xfloat() const
{
union {
typename Trait::holder_t i;
typename Trait::value_t f;
} uu;
uu.i = pack_xfloati<Trait>();
return uu.f;
}
template <class T>
constexpr T pack_float() const
{
return pack_xfloat<typename float2trait<T>::trait>();
}
constexpr bool isInfinity() const { return type == Infinity; }
constexpr bool isRegular() const { return type == Regular; }
constexpr bool isNaN() const { return type == NaN; }
constexpr bool isZero() const { return type == Zero; }
constexpr bool isPositive() const { return !negativeSign; }
static constexpr Unpacked infinity() { return Unpacked(Infinity); }
static constexpr Unpacked pinfinity() { return Unpacked(Infinity, false); }
static constexpr Unpacked ninfinity() { return Unpacked(Infinity, true); }
static constexpr Unpacked nan() { return Unpacked(NaN); }
static constexpr Unpacked one() { return Unpacked(0, 0, false); }
static constexpr Unpacked zero() { return Unpacked(Zero); }
template <class Trait>
static constexpr Unpacked make_fixed(typename Trait::value_t x)
{
return Unpacked().unpack_xfixed<Trait>(x);
}
template <class Trait>
static constexpr Unpacked make_floati(typename Trait::holder_t x)
{
return Unpacked().unpack_xfloati<Trait>(x);
}
template <class Trait>
static constexpr Unpacked make_float(typename Trait::value_t x)
{
return Unpacked().unpack_xfloat<Trait>(x);
}
constexpr bool operator==(const Unpacked &u) const
{
// nan != nan ALWAYS
return type == NaN || u.type == NaN
? false
: negativeSign == u.negativeSign && type == u.type &&
(type == Regular
? (exponent == u.exponent && fraction == u.fraction)
: true);
}
constexpr bool operator!=(const Unpacked &u) const
{
// nan != nan ALWAYS
return type == NaN || u.type == NaN ? true : (*this == u);
}
constexpr Unpacked operator-() const
{
return Unpacked(exponent, fraction, !negativeSign);
}
CONSTEXPR14 Unpacked inv() const
{
switch (type)
{
case Regular:
if (fraction == 0)
{
// std::cout << "[exponent inversion " << std::dec << " exponent" <<
// exponent << "] becomes " << -exponent << std::endl;
return Unpacked(-exponent, 0, negativeSign);
}
else
{
// one == 0,0,false
// TODO FIX SIGN/INFINITY/NAN
// put hidden 1. in mantiss
POSIT_LUTYPE afrac = FT_leftmost_bit;
POSIT_LUTYPE bfrac = FT_leftmost_bit | (fraction >> 1);
// std::cout << "inversion " << std::hex << bfrac << " exponent" <<
// exponent << std::endl;
auto exp = -exponent;
if (afrac < bfrac)
{
exp--;
bfrac >>= 1;
}
return Unpacked(
exp, (((typename nextinttype<FT>::type)afrac) << FT_bits) / bfrac,
negativeSign);
// return one()/(*this);
}
break;
case Infinity:
return zero();
case Zero:
return infinity();
case NaN:
default:
return *this;
}
}
/// unpacks a value stored as fixed or integer. Value and holder match
template <class Trait>
CONSTEXPR14 Unpacked &unpack_xfixed(typename Trait::value_t value);
/// unpacks a floating point value as expressed by its holding type (uint32
/// for single)
template <class Trait>
CONSTEXPR14 Unpacked &unpack_xfloati(typename Trait::holder_t value);
/// unpacks a floating point value by its value type (single)
template <class Trait>
Unpacked &unpack_xfloat(typename Trait::value_t value) // CANNOT be
// constexpr, except
// using the expensive
// float2bits
{
union {
typename Trait::holder_t i;
typename Trait::value_t f;
} uu;
uu.f = value;
return unpack_xfloati<Trait>(uu.i);
}
CONSTEXPR14 friend Unpacked operator-(Unpacked a, Unpacked b)
{
return a + (-b);
}
CONSTEXPR14 Unpacked &operator+=(const Unpacked &a)
{
Unpacked r = *this + a;
*this = r;
return *this;
}
CONSTEXPR14 Unpacked &operator-=(const Unpacked &a)
{
*this += (-a);
return *this;
}
// TODO overflow?
CONSTEXPR14 friend Unpacked operator+(Unpacked a, Unpacked b)
{
// UnpackedDualSel(*,NaN)
// UnpackedDualSel(NaN,*)
if (a.isNaN() || b.isNaN())
return a;
switch (UnpackedDualSel(a.type, b.type))
{
case UnpackedDualSel(Regular, Regular):
{
auto dir = a.exponent - b.exponent;
const ET exp = (dir < 0 ? b.exponent : a.exponent) + 1;
// move right means increment exponent
// 1.xxxx => 0.1xxxxxx
// 1.yyyy => 0.1yyyyyy
POSIT_LUTYPE afrac1 =
(FT_leftmost_bit >> 1) |
(a.fraction >> 2); // denormalized and shifted right
POSIT_LUTYPE bfrac1 = (FT_leftmost_bit >> 1) | (b.fraction >> 2);
POSIT_LUTYPE afrac = dir < 0
? (afrac1 >> -dir)
: afrac1; // denormalized and shifted right
POSIT_LUTYPE bfrac = dir < 0 ? bfrac1 : (bfrac1 >> dir);
// 1.xxxx => 0.1xxxxx => 0.0k 1 xxxx
//
// if dir==0 then:
// 0.1xxxxx
// 0.1yyyyy
// 1.zzzzzz
//
// but also
// 0.1xxxx
// 0.0001yyyy
// 0.1zzzz
//
// if 1. we easily normalize by shift
// if 0. we pre
int mode =
a.negativeSign == b.negativeSign ? 0 : afrac > bfrac ? 1 : -1;
bool osign = mode >= 0 ? a.negativeSign : b.negativeSign;
POSIT_LUTYPE frac = mode == 0
? afrac + bfrac
: mode > 0 ? afrac - bfrac : bfrac - afrac;
return Unpacked(exp, frac, osign).normalized(); // pass denormalized
}
case UnpackedDualSel(Regular, Zero):
case UnpackedDualSel(Zero, Zero):
case UnpackedDualSel(Infinity, Zero):
case UnpackedDualSel(Infinity, Regular):
return a;
case UnpackedDualSel(Zero, Regular):
case UnpackedDualSel(Zero, Infinity):
case UnpackedDualSel(Regular, Infinity):
return b;
default: // case UnpackedDualSel(Infinity,Infinity):
return (a.negativeSign == b.negativeSign) ? a : nan();
}
}
// https://www.edwardrosten.com/code/fp_template.html
// https://github.com/Melown/half
// TODO overflow?
CONSTEXPR14 friend Unpacked operator*(const Unpacked &a, const Unpacked &b)
{
if (a.isNaN() || b.isNaN())
return a;
switch (UnpackedDualSel(a.type, b.type))
{
case UnpackedDualSel(Regular, Regular):
{
POSIT_LUTYPE afrac = FT_leftmost_bit | (a.fraction >> 1);
POSIT_LUTYPE bfrac = FT_leftmost_bit | (b.fraction >> 1);
auto frac =
((((typename nextinttype<FT>::type)afrac) * bfrac) >> FT_bits);
#ifdef FPGAHLS
#pragma HLS RESOURCE variable = frac core = Mul_LUT
#endif
bool q = (frac & FT_leftmost_bit) == 0;
auto rfrac = q ? (frac << 1) : frac;
auto exp = a.exponent + b.exponent + (q ? 0 : 1);
#if 0
if ((frac & FT_leftmost_bit) == 0) {
exp--;
frac <<= 1;
}
#endif
return Unpacked(exp, rfrac << 1, a.negativeSign ^ b.negativeSign);
}
case UnpackedDualSel(Regular, Zero):
case UnpackedDualSel(Zero, Regular):
case UnpackedDualSel(Zero, Zero):
return zero();
case UnpackedDualSel(Infinity, Zero):
case UnpackedDualSel(Zero, Infinity):
return nan();
default: // case UnpackedDualSel(Infinity,Infinity):
// inf inf or inf reg or reg inf
return (a.negativeSign ^ b.negativeSign) ? ninfinity() : pinfinity();
}
}
/**
* Division Truth Table
*/
// TODO overflow?
CONSTEXPR14 friend Unpacked operator/(const Unpacked &a, const Unpacked &b)
{
if (a.isNaN() || b.isNaN())
return a;
// 9 more cases
switch (UnpackedDualSel(a.type, b.type))
{
case UnpackedDualSel(Regular, Regular):
{
POSIT_LUTYPE afrac = FT_leftmost_bit | (a.fraction >> 1);
POSIT_LUTYPE bfrac1 = FT_leftmost_bit | (b.fraction >> 1);
auto exp = a.exponent - b.exponent + (afrac < bfrac1 ? -1 : 0);
POSIT_LUTYPE bfrac = afrac < bfrac1 ? (bfrac1 >> 1) : bfrac1;
/*
if (afrac < bfrac) {
exp--;
bfrac >>= 1;
}
*/
return Unpacked(
exp, (((typename nextinttype<FT>::type)afrac) << FT_bits) / bfrac,
a.negativeSign ^ b.negativeSign);
}
case UnpackedDualSel(Zero, Zero):
case UnpackedDualSel(Infinity, Infinity):
return nan();
case UnpackedDualSel(Zero, Infinity):
return zero();
case UnpackedDualSel(Zero, Regular):
case UnpackedDualSel(Infinity, Zero):
return a;
case UnpackedDualSel(Regular, Zero):
return Unpacked(Unpacked::Infinity, a.negativeSign);
default: // case UnpackedDualSel(Infinity,Regular):
return (a.negativeSign ^ b.negativeSign) ? ninfinity() : pinfinity();
}
}
#ifndef FPGAHLS
friend std::ostream &operator<<(std::ostream &ons, Unpacked const &o)
{
switch (o.type)
{
case Unpacked::Regular:
ons << "up(" << (o.negativeSign ? "-" : "+")
<< " exp (dec) = " << std::dec
<< typename printableinttype<const ET>::type(o.exponent)
<< " fraction (hex) = " << std::hex
<< typename printableinttype<const FT>::type(o.fraction)
<< " (bin) = " << std::dec
<< (std::bitset<sizeof(o.fraction) * 8>(o.fraction)) << ")";
break;
case Unpacked::Infinity:
ons << (o.negativeSign ? "up(-infinity)" : "up(+infinity)");
break;
case Unpacked::NaN:
ons << "up(nan)";
break;
case Unpacked::Zero:
ons << "up(0)";
break;
}
return ons;
}
#endif
};
template <class FT, class ET>
template <class Trait>
CONSTEXPR14 Unpacked<FT, ET> &Unpacked<FT, ET>::unpack_xfixed(
typename Trait::value_t nx)
{
// TODO: handle infinity or nan in Trait
if (nx != 0)
{
using UT = typename std::make_unsigned<typename Trait::value_t>::type;
type = Regular;
negativeSign = nx < 0;
UT x = pcabs(nx);
const int p = Trait::totalbits - findbitleftmostC(x) - 1; // 31->0,0->31
exponent = (p - Trait::fraction_bits);
UT ux = p == 0 ? 0 : (x << (Trait::totalbits - p));
// UT x : 0[N-p-1] 1 ?[p]
// corner cases:
// x: 1 ?[N-1]
// x: 0[N-1] 1
// UT ux: ?[p] 0[N-p]
// FT f: ?[min(p,size(FT)] 0[size(FT)-min(p,size(FT)]
// take all p bits rightmost of x and make them leftmost
fraction = cast_right_to_left<UT, Trait::totalbits, FT, FT_bits>()(ux);
}
else
{
exponent = 0;
fraction = 0;
type = Zero;
negativeSign = false;
}
return *this;
}
// https://www.h-schmidt.net/FloatConverter/IEEE754.html
template <class FT, class ET>
template <class Trait>
CONSTEXPR14 Unpacked<FT, ET> &Unpacked<FT, ET>::unpack_xfloati(
typename Trait::holder_t value)
{
ET rawexp = bitset_getT(value, Trait::fraction_bits, Trait::exponent_bits);
type = Regular;
negativeSign =
value & (((typename Trait::holder_t)1) << (Trait::data_bits - 1));
exponent = rawexp - Trait::exponent_bias; // ((un.u >> Trait::fraction_bits)
// & Trait::exponent_mask)
// std::cout << "un.u is " << std::hex <<un.u << " for " << value <<
// std::endl; std::cout << std::dec << "float trait: fraction bits " <<
// Trait::fraction_bits << " exponent bits " << Trait::exponent_bits << " bias
// " << Trait::exponent_bias << " mask " << std::hex << Trait::exponent_mask<<
// std::endl; std::cout << std::hex << "exponent output " << std::hex <<
// exponent << " " << std::dec << exponent << " fraction " << std::hex <<
// fraction << std::endl;
// fractional part is LSB of the holder_t and of length
fraction = cast_right_to_left<typename Trait::holder_t, Trait::fraction_bits,
FT, FT_bits>()(value);
// if(FT_bits < Trait::fraction_bits)
// fraction = bitset_getT(value,0,Trait::fraction_bits) >>
//(Trait::fraction_bits-FT_bits); else fraction =
//((POSIT_LUTYPE)bitset_getT(value,0,Trait::fraction_bits)) <<
//(FT_bits-Trait::fraction_bits);
// stored exponent: 0, x, exponent_mask === 0, any, infinity
// biased: -max, -max+1, ..., max, max+1 === 0, min, ..., max, infinity
if (rawexp == ((1 << Trait::exponent_bits) - 1)) // AKA 128 for single
{
if (fraction == 0)
{
type = Infinity;
}
else
{
type = NaN; // don't care which
}
}
else if (rawexp == 0)
{
// normalized
if (!Trait::with_denorm || fraction == 0)
{
type = Zero;
negativeSign = false; // don't care signed zero
}
// denormalized
else
{
int k = findbitleftmostC(fraction);
exponent -= k;
fraction <<= (k + 1);
//std::cout << typeid(Trait).name() << "unpacking: denormalized (rawexp=0,fraction=" << (int)tmp << ") unpacked as (exp=" << exponent << ",fraction=" << (int)fraction << ")"<< std::endl;
}
}
return *this;
}
template <int abits, class AT, int bbits, class BT, bool abits_gt_bbits, AT msb>
struct fraction_bit_extract
{
};
/// specialization when abits >= bbits
template <int abits, class AT, int bbits, class BT, AT msb>
struct fraction_bit_extract<abits, AT, bbits, BT, true, msb>
{
static constexpr BT packdenorm(AT fraction)
{
// expand the fractiona part
return (msb | (fraction >> 1)) >> (abits - bbits);
}
static constexpr BT pack(AT fraction)
{
return bitset_getT(fraction, abits - bbits, bbits);
}
};
/// specialization when bbits >= abits
template <int abits, class AT, int bbits, class BT, AT msb>
struct fraction_bit_extract<abits, AT, bbits, BT, false, msb>
{
static constexpr BT packdenorm(AT fraction)
{
return ((BT)(msb | (fraction >> 1)) << (bbits - abits));
}
static constexpr BT pack(AT fraction)
{
return ((BT)fraction) << (bbits - abits);
}
};
/**
* Convert (s,2**E,F) to int
*/
template <class FT, class ET>
template <class Trait>
CONSTEXPR14 typename Trait::value_t Unpacked<FT, ET>::pack_xfixed() const
{
switch (type)
{
case Infinity:
return 0;
case Zero:
return 0;
case NaN:
return 0;
default:
break;
}
constexpr int intbits = Trait::totalbits - Trait::fraction_bits;
if (exponent >= intbits)
{
return negativeSign ? std::numeric_limits<typename Trait::value_t>::lowest()
: std::numeric_limits<typename Trait::value_t>::max();
}
else if (exponent < -Trait::fraction_bits)
{
return 0;
}
else
{
using ST = typename Trait::value_t;
using UT = typename std::make_unsigned<ST>::type;
// fraction 1.xxxxx from left aligned over FT bytes to UT bytes still left
// aligned over Trait::totalbits
UT f = fraction_bit_extract<FT_bits, FT, Trait::totalbits, UT,
(FT_bits > Trait::totalbits),
FT_leftmost_bit>::pack(fraction);
// add the 1 bit for the current exponent
// f[totalbits] -> 0[intbits-exponent+1] 1
// f[exponent+Trait::fraction_bits-1]
//
// extrema: e.g. for totalbits=32, whatever fraction
// - exponent==-Trait::fraction_bits ==> 1 | 0
// - exponent==intbits-1 ==> 0x8000000 | (F >> 1)
ST r = (ST(1) << (exponent + Trait::fraction_bits)) |
(ST)(f >> (intbits - exponent));
return negativeSign ? -r : r;
}
}
template <class FT, class ET>
template <class Trait>
CONSTEXPR14 typename Trait::holder_t Unpacked<FT, ET>::pack_xfloati() const
{
switch (type)
{
case Infinity:
return negativeSign ? Trait::ninfinity_h : Trait::pinfinity_h;
case Zero:
return 0;
case NaN:
return Trait::nan_h;
; // it will cast to double TODO: it will cast to value_t
default:
break;
}
largest_type<ET, typename int_least_bits<Trait::exponent_bits>::type> fexp =
exponent;
fexp += Trait::exponent_bias;
// left aligned
typename Trait::holder_t fexpbits = 0;
typename Trait::holder_t ffracbits = 0;
if (fexp > Trait::exponent_max) // this is RAW exponent max
{
return negativeSign ? Trait::lowest_h : Trait::max_h;
// overflow, set as MAX
// fexpbits = ((typename Trait::holder_t)Trait::exponent_max) <<
// (Trait::fraction_bits); // AKA 254 and 23 ffracbits = -1;
}
else if (fexp < 1)
{
if (Trait::with_denorm)
return 0;
else
{
// denormalized numbers are and happen when raw exponent is below 1
// 0.fractionbits
//
// 2^exponent * fraction ==> 0.rawfraction
//
// approach:
// FT_bits >= Trait::fraction_bits
// FT_leftmost_bit| (fraction>>)
// FT_bits < Trait::fraction_bits
ffracbits = fraction_bit_extract<FT_bits, FT, Trait::fraction_bits,
typename Trait::holder_t,
(FT_bits > Trait::fraction_bits),
FT_leftmost_bit>::packdenorm(fraction);
// use denormalization
ffracbits >>= -fexp;
//std::cout << typeid(Trait).name()<< "pack - denormalized (exp=" << exponent << ",fraction=" << (int)fraction << ") means rawexp=" << fexp << " results in fraction=" << (int)ffracbits << "\n";
}
}
else // normal
{
fexpbits = ((typename Trait::holder_t)(fexp & Trait::exponent_mask))
<< (Trait::fraction_bits);
ffracbits = fraction_bit_extract<
FT_bits, FT, Trait::fraction_bits, typename Trait::holder_t,
(FT_bits > Trait::fraction_bits), FT_leftmost_bit>::pack(fraction);
}
typename Trait::holder_t value =
ffracbits | fexpbits | (negativeSign ? Trait::signbit : 0);
// don't underflow to zero?
if (value != 0 && (value << 1) == 0)
{
value++;
}
return value;
}
template <class SrcTrait, class DstTrait, class FT>
constexpr typename DstTrait::holder_t convertfloats(
typename SrcTrait::holder_t src)
{
return Unpacked<typename largest_type<typename SrcTrait::holder_t,
typename DstTrait::holder_t>::type,
int>::template make_floati<SrcTrait>(src)
.template pack_xfloati<DstTrait>();
}
/**
* Emanuele Ruffaldi (C) 2017
* Templated C++ Posit
Test
using X=Posit<int32_t,4,0,uint32_t>;
X::PT::decode_posit_rs(1)
*/
#ifndef FPGAHLS
inline float uint32_to_float(uint32_t i)
{
union {
float f;
uint32_t i;
} x;
x.i = i;
return x.f;
}
#endif
enum class PositSpec { WithNan, WithInf, WithNanInf};
template <class T, int totalbits, int esbits, PositSpec positspec_ >
struct PositTrait
{
static_assert(std::is_signed<T>::value,"required signed T");
static_assert(sizeof(T)*8 >= totalbits,"required enough storage T for provided bits SREF");
static_assert(esbits <= totalbits-3,"esbits should be at most N-3 for the cases [x01,E] and [x10,E]");
using POSIT_STYPE = typename std::make_signed<T>::type;
using POSIT_UTYPE = typename std::make_unsigned<T>::type;
static constexpr PositSpec positspec = positspec_;
static constexpr bool withnan = positspec_ != PositSpec::WithInf;
using exponenttype = typename std::conditional<(totalbits+esbits >= sizeof(T)*8),typename nextinttype<T>::type,T>::type;
//enum : POSIT_UTYPE {
static constexpr POSIT_UTYPE POSIT_MAXREGIME_BITS = totalbits-1;
static constexpr POSIT_UTYPE POIST_ONEHELPER = 1;
static constexpr POSIT_UTYPE POSIT_HOLDER_SIZE = sizeof(T)*8;
static constexpr POSIT_UTYPE POSIT_SIZE = totalbits;
static constexpr POSIT_UTYPE POSIT_ESP_SIZE = esbits;
static constexpr POSIT_UTYPE POSIT_MSB = POIST_ONEHELPER<<(totalbits-1);
static constexpr POSIT_UTYPE POSIT_HOLDER_MSB = POIST_ONEHELPER<<(POSIT_HOLDER_SIZE-1);
static constexpr POSIT_UTYPE POSIT_MASK = ((POSIT_MSB-1)|(POSIT_MSB));
static constexpr POSIT_UTYPE POSIT_ESP_MASK = (POIST_ONEHELPER<< esbits)-1;
//POSIT_HOLDER_MSB = 1U<<(POSIT_HOLDER_SIZE-1),
//POSIT_HOLDER_MASK = ((POSIT_HOLDER_SIZE-1)|(POSIT_HOLDER_SIZE)),
static constexpr POSIT_UTYPE POSIT_EXTRA_BITS = POSIT_HOLDER_SIZE-totalbits;
static constexpr POSIT_UTYPE POSIT_SIGNBIT = (POIST_ONEHELPER<<(totalbits-1)); // bit
static constexpr POSIT_UTYPE POSIT_INVERTBIT = (POIST_ONEHELPER<<(totalbits-2));
static constexpr POSIT_STYPE POSIT_REG_SCALE = 1<<esbits;
// these are portable ways for representing 10000000 and the two adjacents numbers in
// the posit circle
static constexpr POSIT_STYPE _POSIT_TOP = (POSIT_STYPE)((POSIT_UTYPE(~0) << (totalbits-1)));
static constexpr POSIT_STYPE _POSIT_TOPRIGHT = (POSIT_STYPE)((POIST_ONEHELPER<< (totalbits-1))-1);
static constexpr POSIT_STYPE _POSIT_TOPLEFT = (POSIT_STYPE)((POSIT_UTYPE(~0) << (totalbits-1)))+1;
// Without Nan (classic Posit): there only one Infinity
// With NaN: the top element is NaN and then its adjacents correspond to +- Infinity
static constexpr POSIT_STYPE POSIT_PINF = positspec_ == PositSpec::WithNanInf ? _POSIT_TOPRIGHT: _POSIT_TOP ; // 1[sign] 000000 or N-1 111 bits
static constexpr POSIT_STYPE POSIT_NINF = positspec_ == PositSpec::WithNanInf ? _POSIT_TOPLEFT: _POSIT_TOP;
static constexpr POSIT_STYPE POSIT_NAN = _POSIT_TOP; // infinity in withnan=false otherwise it is truly nan
static constexpr POSIT_STYPE POSIT_ONE = POSIT_INVERTBIT; // fine due to position of invert bit
static constexpr POSIT_STYPE POSIT_MONE = -POSIT_ONE ; // minus one
// Two
static constexpr POSIT_STYPE POSIT_TWO = (POSIT_INVERTBIT | (POSIT_INVERTBIT>>(1+esbits)));
// 1/2
// 00 1[esbits+1] 0[N-2-esbitis-1]
static constexpr POSIT_STYPE POSIT_HALF = POSIT_STYPE( (POSIT_UTYPE(-1) >> (totalbits-esbits-1))) << (totalbits-3-esbits);
// max value below Infinity
// 1[holder-total] 1 0[total-1]
static constexpr POSIT_STYPE POSIT_MAXPOS = _POSIT_TOPRIGHT - (positspec_ == PositSpec::WithNanInf ? 1:0);
// min value above -Infinity
// 0[holder-total] 0 1[total-1]
static constexpr POSIT_STYPE POSIT_MINNEG = _POSIT_TOPLEFT + (positspec_ == PositSpec::WithNanInf? 1:0);
// minimal number above zero
static constexpr POSIT_STYPE POSIT_AFTER0 = 1; // right to 0
static constexpr POSIT_STYPE POSIT_BEFORE0 = -POSIT_AFTER0; // left to 0
//static constexpr exponenttype maxexponent = withnan_ ? POSIT_REG_SCALE * (POSIT_SIZE - 3) : POSIT_REG_SCALE * (POSIT_SIZE - 2); // sign+1st rs
//static constexpr exponenttype minexponent = (-((exponenttype)POSIT_REG_SCALE) * (POSIT_SIZE - 2)) // sign+1st rs
static constexpr exponenttype maxexponent() { return positspec_ == PositSpec::WithNanInf ? POSIT_REG_SCALE * (totalbits - 3) : POSIT_REG_SCALE * (totalbits - 2); }
static constexpr exponenttype minexponent() { return (-((exponenttype)POSIT_REG_SCALE) * (totalbits - 2)) ; }
//enum : exponenttype{
//};
//static constexpr POSIT_UTYPE LMASK(POSIT_UTYPE bits, POSIT_UTYPE size)
//{ return ((bits) & (POSIT_MASK << (POSIT_SIZE - (size)))); }
// pars is T_left
static CONSTEXPR14 std::pair<int,int> decode_posit_rs(T pars)
{
const bool x = (pars & POSIT_HOLDER_MSB) != 0; // marker bit for > 1
int aindex = x ? (~pars == 0 ? POSIT_MAXREGIME_BITS : findbitleftmostC((POSIT_UTYPE)~pars)) : (pars == 0 ? POSIT_MAXREGIME_BITS : findbitleftmostC((POSIT_UTYPE)pars)); // index is LAST with !x
int index = aindex; // aindex > POSIT_SIZE ? POSIT_SIZE : aindex;
int reg = x ? index-1 : -index;
int rs = POSIT_MAXREGIME_BITS < index+1 ? POSIT_MAXREGIME_BITS : index+1; //std::min((int)POSIT_MAXREGIME_BITS,index+1);
/**
if(x)
{
pars = ~pars;
if(!pars)
{
return { POSIT_MAXREGIME_BITS-1, POSIT_MAXREGIME_BITS };
}
else
{
int index = findbitleftmostC((POSIT_UTYPE)pars));
return { index-1, index+1 };
}
}
else
{
if(!pars)
{
return { -(int)POSIT_MAXREGIME_BITS, POSIT_MAXREGIME_BITS };
}
else
{
int index = findbitleftmostC((POSIT_UTYPE)pars));
return { -index, index+1 };
}
}
*/
//std::cout << "decode posit " << std::hex << std::bitset<sizeof(T)*8>(pars) << " 1first " << x << " index " << index << "(aindex " << aindex << ") rs " << rs << "reg " << reg << std::endl;
return {reg,rs};
}
static constexpr std::pair<POSIT_STYPE,POSIT_UTYPE> split_reg_exp(exponenttype eexponent)
{
// FLOORDIV(a,b) ((a) / (b) - ((a) % (b) < 0))
// int reg = FLOORDIV(up.exp, POW2(es));
// POSIT_UTYPE exp = up.exp - POW2(es) * reg;
return {eexponent >> POSIT_ESP_SIZE, eexponent & POSIT_ESP_MASK };
}
/// compiler note: it generate simply: shl and or
static constexpr exponenttype join_reg_exp(POSIT_STYPE reg, POSIT_UTYPE exp)
{
return (((exponenttype)reg) * (1<<POSIT_ESP_SIZE))|exp;
}
};
//template <class T, int totalbits, int esbits, PositSpec positspec_ >
//constexpr typename PositTrait<T,totalbits,esbits,withnan_>::exponenttype PositTrait<T,totalbits,esbits,withnan_>::minexponent;
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
class Posit;
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 auto unpack_posit(const Posit<T,totalbits,esbits,FT,positspec> & p) -> typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT ;
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> pack_posit(const typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT & x);
/**
* Minimal Unpacked representaiton of the Posit
* UT is UnpackedT
* PT is the Trait
*/
template <class UT, class PT>
struct UnpackedLow_t
{
using Type = typename UT::Type;
constexpr UnpackedLow_t(Type t): type(t), negativeSign(false), regime(0),fraction(0),exp(0) {}
constexpr UnpackedLow_t(Type t, bool anegativeSign): type(t), negativeSign(anegativeSign), regime(0),fraction(0),exp(0) {}
constexpr UnpackedLow_t(bool n, typename PT::POSIT_STYPE r, typename PT::POSIT_UTYPE e, typename PT::POSIT_UTYPE f):
type(UT::Regular),negativeSign(n), regime(r), exp(e), fraction(f) {}
Type type;
bool negativeSign; // for Regular and Infinity if applicabl
typename PT::POSIT_STYPE regime; // decoded with sign
typename PT::POSIT_UTYPE exp; // decoded
typename PT::POSIT_UTYPE fraction; // fraction left aligned without 1.
};
/**
* Stores the data in the MSB totalbits of T
* Uses esbits bits
*
* Notes:
* \tparam T is the holding type that has to be signed due to complement 2 sign method
* \tparam totalbits is the significant bits of posit stored in T right aligned. Due to the 2 complement scheme the MSB bits are extension of the sign
* \tparam esbits is the size of the exponent
* \tparam FT is the unisgned type holding the fraction with the 1 explicity specified
*
*/
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
class Posit
{
public:
using PT=PositTrait<T,totalbits,esbits,positspec>;
static_assert(std::is_unsigned<FT>::value,"required unsigned FT");
enum { vtotalbits = totalbits, vesbits = esbits};
struct DeepInit{};
static constexpr bool withnan = positspec != PositSpec::WithInf;
using value_t=T;
using fraction_t=FT;
using UnpackedT=Unpacked<FT,typename PT::exponenttype>;
using UnpackedLow = UnpackedLow_t<UnpackedT,PT>;
using exponenttype = typename PT::exponenttype;
T v; // index in the N2 space
struct PositMul
{
constexpr PositMul(Posit av, Posit bv) : a(av),b(bv) {}
Posit a,b;
constexpr Posit asPosit() const { return pack_posit<T,totalbits,esbits,FT,positspec>(a.unpack()*b.unpack()); }
constexpr operator Posit() const { return asPosit(); }
constexpr operator UnpackedT() const { return asPosit(); }
#ifndef FPGAHLS
constexpr operator float() const { return asPosit(); }
constexpr operator double() const { return asPosit(); }
#endif
constexpr operator int() const { return asPosit(); }
// pa.a*pa.b+pb.a*pb.b =>
friend constexpr Posit operator+(const PositMul & pa, const PositMul & pb)
{
return pack_posit<T,totalbits,esbits,FT,positspec>(pa.a.unpack()*pa.b.unpack()+pb.a.unpack()*pb.b.unpack());
}
// missing operators
// &
// -
};
CONSTEXPR14 Posit half() const;
CONSTEXPR14 Posit twice() const;
CONSTEXPR14 UnpackedLow unpack_low() const;
static CONSTEXPR14 Posit pack_low(UnpackedLow);
static CONSTEXPR14 UnpackedT unpacked_low2full(UnpackedLow x);
static CONSTEXPR14 UnpackedLow unpacked_full2low(UnpackedT tx);
/// diagnostics with full details
struct info
{
bool infinity = false;
bool nan = false;
int k = 0;
bool sign = false; // true if negative
double regime = 0;
int rs = 0;
int es = 0;
int fs = 0;
int exp = 0;
int exponent = 0;
FT ifraction = 0;
double fraction = 0;
double value = 0;
};
info analyze();
friend constexpr bool operator == (const Posit & a, const Posit & u) { return withnan && (a.isNaN()||u.isNaN())?false :a.v == u.v; }
friend constexpr bool operator != (const Posit & a, const Posit & u) { return !(a == u); }
friend constexpr bool operator < (const Posit & a, const Posit & u) { return withnan && (a.isNaN()||u.isNaN())?false :a.v < u.v;; }
friend constexpr bool operator <= (const Posit & a, const Posit & u) { return withnan && (a.isNaN()||u.isNaN())?false :a.v <= u.v; }
friend constexpr bool operator > (const Posit & a, const Posit & u) { return withnan && (a.isNaN()||u.isNaN())?false :a.v > u.v; }
friend constexpr bool operator >= (const Posit & a, const Posit & u) { return withnan && (a.isNaN()||u.isNaN())?false :a.v >= u.v; }
static constexpr Posit ldexp(const Posit & u, int exp); // exponent product
using single_tag = typename UnpackedT::single_tag;
constexpr Posit() : v(0) {}
CONSTEXPR14 explicit Posit(single_tag t, uint32_t p) { v = pack_posit<T,totalbits,esbits,FT,positspec>(UnpackedT(t,p)).v; }
/// construct passing the holding type x
CONSTEXPR14 explicit Posit(DeepInit, T x) : v(x) {}
/// construct from decomposed (s, R,E,F)
CONSTEXPR14 explicit Posit(UnpackedLow u) : v(pack_low(u).v) {}
/// construct from fully unpacked floating (s,e,F)
CONSTEXPR14 explicit Posit(UnpackedT u) : v(pack_posit<T,totalbits,esbits,FT,positspec>(u).v) {}
#ifndef FPGAHLS
CONSTEXPR14 explicit Posit(float f): Posit(UnpackedT(f)) {}
CONSTEXPR14 explicit Posit(double d): Posit(UnpackedT(d)) {}
#endif
CONSTEXPR14 Posit(int i): Posit(UnpackedT(i)) {}
constexpr UnpackedT unpack() const { return unpack_posit<T,totalbits,esbits,FT,positspec>(*this); }
/// absolute value
/// TODO: use (v ^ mask) - mask OR (x+mask)^nasj
/// where int const mask = v >> sizeof(int) * CHAR_BIT - 1;
constexpr Posit abs() const { return Posit(DeepInit(),pcabs(v)); } // could be >= infinity because infinity is sign symmetric
/// negation
constexpr Posit neg() const { return Posit(DeepInit(),-v); };
/// 1/x
CONSTEXPR14 Posit inv() const;
// SFINAE optionally: template<typename U = T, class = typename std::enable_if<withnan, U>::type>
constexpr bool hasNaN() const { return positspec != PositSpec::WithInf; }
constexpr bool isNaN() const { return positspec != PositSpec::WithInf && v == PT::POSIT_NAN; }
constexpr bool isnegative() const { return v < 0; } //(v &POSIT_SIGNBIT) != 0; }
constexpr bool isinfinity() const { return positspec != PositSpec::WithNan && (v == PT::POSIT_PINF || v == PT::POSIT_NINF); }
constexpr bool iszero() const { return v == 0; }
constexpr bool isone() const { return v == PT::POSIT_ONE; }
constexpr Posit prev() const { return Posit(DeepInit(),v > PT::POSIT_MAXPOS || v <= PT::POSIT_MINNEG ? v : v-1); }
constexpr Posit next() const { return Posit(DeepInit(),v <= PT::POSIT_MINNEG || v > PT::POSIT_MAXPOS ? v : v+1); }
//TBDconstexpr bool isNaN() const;
//TBD constexpr bool isexact() const { return (v&1) == 0; }
//TBD constexpr bool isfractional() const { return v > 0 && (abs().v < (N2>>2)); } // (0 < x < 1) or (-1 < x < 0) == (-1,1) removing 0
//TBD constexpr bool isstrictlynegative() const { return v > (N2>>1); } // -inf < x < 0
// Level 1: unpacked
// Level 0: something using posit specialties
friend constexpr PositMul operator*(const Posit & a, const Posit & b)
{
return PositMul(a,b);
}
friend constexpr Posit operator+(const Posit & a, const PositMul & b)
{
return fma(b.a,b.b,a);
}
friend constexpr Posit operator+(const PositMul & a, const Posit & b)
{
return fma(a.a,a.b,b);
}
friend constexpr Posit fma(const Posit & a, const Posit & b, const Posit & c)
{
return pack_posit<T,totalbits,esbits,FT,positspec>(a.unpack()*b.unpack()+c.unpack());
}
CONSTEXPR14 Posit & operator*= (const Posit & b)
{
*this = pack_posit<T,totalbits,esbits,FT,positspec>(unpack()*b.unpack());
return *this;
}
friend constexpr Posit operator+(const Posit & a, const Posit & b)
{
return a.iszero() ? b : b.iszero() ? a: pack_posit<T,totalbits,esbits,FT,positspec>(a.unpack()+b.unpack());
}
Posit& operator+=(const Posit &a) { Posit r = *this+a; v = r.v; return *this; }
static constexpr Posit zero() { return Posit(DeepInit(),0); }
static constexpr Posit inf() { return Posit(DeepInit(),PT::POSIT_PINF); }
static constexpr Posit pinf() { return Posit(DeepInit(),PT::POSIT_PINF); }
static constexpr Posit ninf() { return Posit(DeepInit(),PT::POSIT_NINF); }
static constexpr Posit max() { return Posit(DeepInit(),PT::POSIT_MAXPOS); }
static constexpr Posit min() { return Posit(DeepInit(),PT::POSIT_AFTER0); }
static constexpr Posit lowest() { return Posit(DeepInit(),PT::POSIT_MINNEG); }
// SFINAE optionally: template<typename U = T, class = typename std::enable_if<withnan, U>::type>
static constexpr Posit nan() { return Posit(DeepInit(),PT::POSIT_NAN); }
static constexpr Posit infinity() { return Posit(DeepInit(),PT::POSIT_PINF); }
static constexpr Posit one() { return Posit(DeepInit(),PT::POSIT_ONE); }
static constexpr Posit two() { return Posit(DeepInit(),PT::POSIT_TWO); }
static constexpr Posit mone() { return Posit(DeepInit(),PT::POSIT_MONE); }
static constexpr Posit onehalf() { return Posit(DeepInit(),PT::POSIT_HALF); }
// custom operators
constexpr Posit operator-() const { return neg(); }
constexpr Posit operator~() const { return inv(); }
friend constexpr Posit operator-(const Posit & a, const Posit & b) { return a + (-b); }
friend constexpr Posit operator/(const Posit & a, const Posit & b) { return pack_posit< T,totalbits,esbits,FT,positspec> (a.unpack()/b.unpack()); }
/*
void setBits(POSIT_UTYPE bits)
{
if(bits & POSIT_SIGNBIT)
{
v = ((~0) << POSIT_SIZE) | bits;
}
else
{
v = bits;
}
}
*/
// slowproduct
// slowsum
// exactvalue
// iostream
// sqrt
// exp
// conversion
// max
constexpr uint32_t as_float_bin() const { return unpack().template pack_xfloati<single_trait>(); }
constexpr operator UnpackedT() const { return unpack(); }
#ifndef FPGAHLS
constexpr operator float() const { return unpack(); }
constexpr operator double() const { return unpack(); }
#endif
constexpr operator int() const { return unpack(); }
/// 1/(exp(-x)+1)
/// TODO: infintity check + __round of result
constexpr Posit pseudosigmoid() const { return Posit(DeepInit(),(v ^ PT::POSIT_SIGNBIT) >> 2); };
/// ln(1+exp(x))
constexpr Posit pseudosoftplus() const { return Posit(DeepInit(),(v ^ PT::POSIT_SIGNBIT) >> 1); };
/// returns true if in [0,1]
constexpr bool isUnitRange() const { return v >= 0 && v <= PT::POSIT_ONE; };
/// unitary range 1-x
constexpr Posit urOneMinus() const { return Posit(DeepInit(),PT::POSIT_INVERTBIT-v); }
/// unitary range x(1-x)
constexpr Posit urDeltaPs() const { return (*this)*urOneMinus(); }
struct FullWriter
{
FullWriter(T x): ax(x) {}
T ax;
};
FullWriter describe() const { return FullWriter(v); }
#ifndef FPGAHLS
friend std::ostream & operator << (std::ostream &ons, const FullWriter & w)
{
return ons;
}
#endif
};
#if 0
//template <class T,int totalbits, int esbits, class FT, PositSpec positspec, class Trait>
template <class T, int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec>::Posit(int xvalue)
{
using Trait=PT;
using POSIT_UTYPE = typename PT::POSIT_UTYPE;
using POSIT_STYPE = typename PT::POSIT_STYPE;
using UT=Unpacked<FT,typename PT::exponenttype>;
if(xvalue == 0)
{
v = 0;
}
else
{
bool negativeSign = xvalue < 0;
int value = xvalue < 0 ? -xvalue: xvalue;
auto exponentF = rawexp - Trait::exponent_bias; // ((un.u >> Trait::fraction_bits) & Trait::exponent_mask)
auto fractionF = cast_right_to_left<typename Trait::holder_t,Trait::fraction_bits,FT,UT::FT_bits>()(value);
if(rawexp == ((1 << Trait::exponent_bits)-1))
{
if(fractionF == 0)
{
return PP(typename PP::DeepInit(),negativeSign ? PT::POSIT_NINF : PT::POSIT_PINF);
}
else
{
return PP(typename PP::DeepInit(),PT::POSIT_NAN);
}
}
else if (rawexp == 0)
{
if(fractionF == 0)
{
negativeSign = false;
return PP::zero();
}
else
{
int k = findbitleftmostC(fractionF);
exponentF -= k;
fractionF <<= k+1; // plus normalization
}
}
// Phase 3: compute low as regime (Unpacked_Low)
auto eexponent = clamp<decltype(exponentF)>(exponentF,PT::minexponent,PT::maxexponent); // no overflow
auto rr = PT::split_reg_exp(exponentF);
auto fraction = cast_msb<FT,sizeof(FT)*8,typename PT::POSIT_UTYPE,sizeof(typename PT::POSIT_UTYPE)*8>()(fractionF);
auto reg = rr.first;
auto exp = rr.second;
// Phase 4: UnpackedLow to Posit
auto rs = std::max(-reg + 1, reg + 2);
auto es = std::min((int)(totalbits-rs-1),(int)esbits);
POSIT_UTYPE regbits = reg < 0 ? (PT::POSIT_HOLDER_MSB >> -reg) : (PT::POSIT_MASK << (PT::POSIT_HOLDER_SIZE-(reg+1))); // reg+1 bits on the left
POSIT_UTYPE eexp = msb_exp<POSIT_UTYPE,PT::POSIT_HOLDER_SIZE,esbits,(esbits == 00)>()(exp);
POSIT_STYPE p = ((fraction >> (rs+es+1)) | (eexp >> (rs+1)) | (regbits>>1)) >> (sizeof(PP)*8-totalbits);
return PP(typename PP::DeepInit(),negativeSign ? -p : p);
}
}
#endif
#ifndef FPGAHLS
template <class T, int totalbits, int esbits, class FT, PositSpec positspec>
std::ostream & operator << (std::ostream & ons, Posit<T,totalbits,esbits,FT,positspec> const & o)
{
ons << o.unpack();
return ons;
}
#endif
/// Level 1: -exponent of unpacked
/// Level 0: flip bits of rs
template <class T, int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::inv() const -> Posit
{
auto u = unpack_low();
if(u.fraction == 0)
{
// 2^(R scale + E) with E unsigned
// becomes -R scale + (scale-E)
if(u.exp == 0)
u.regime = -u.regime;
else
{
u.regime = -(u.regime+1);
u.exp = PT::POSIT_REG_SCALE-u.exp; // NOP if esbits == 0
}
return pack_low(u);
}
else
{
return pack_posit< T,totalbits,esbits,FT,positspec> (unpacked_low2full(u).inv());
}
}
template <class T, int totalbits, int esbits, class FT, PositSpec positspec>
constexpr Posit<T,totalbits,esbits,FT,positspec> neg(Posit<T,totalbits,esbits,FT,positspec> x) { return -x; }
template <class T, int totalbits, int esbits, class FT, PositSpec positspec>
constexpr Posit<T,totalbits,esbits,FT,positspec> inv(Posit<T,totalbits,esbits,FT,positspec> x) { return ~x; }
template <class T, int hbits,int ebits, bool zeroes>
struct msb_exp
{
};
template <class T, int hbits,int ebits>
struct msb_exp<T,hbits,ebits,true>
{
constexpr T operator()(T) const
{
return 0;
}
};
template <class T, int hbits,int ebits>
struct msb_exp<T,hbits,ebits,false>
{
constexpr T operator()(T exp) const
{
return exp << (hbits-ebits);
}
};
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::unpack_low() const -> UnpackedLow
{
using PT=PositTrait<T,totalbits,esbits,positspec>;
using POSIT_UTYPE = typename PT::POSIT_UTYPE;
//using POSIT_STYPE = typename PT::POSIT_STYPE;
if(isinfinity()) // infinity
{
return UnpackedLow(UnpackedT::Infinity, v < 0);
}
else if(isNaN())
{
return UnpackedLow(UnpackedT::NaN);
}
else if(v == 0)
return UnpackedLow(UnpackedT::Zero);
else
{
//constexpr int POSIT_RS_MAX = PT::POSIT_SIZE-1-esbits;
//r.type = UnpackedT::Regular;
bool negativeSign = (v & PT::POSIT_SIGNBIT) != 0;
//std::cout << "unpacking " << std::bitset<sizeof(T)*8>(pa) << " abs " << std::bitset<sizeof(T)*8>(pcabs(pa)) << " r.negativeSign? " << r.negativeSign << std::endl;
T pa = negativeSign ? -v : v;
// std::cout << "after " << std::hex << pa << std::endl;
POSIT_UTYPE pars1 = pa << (PT::POSIT_EXTRA_BITS+1); // MSB: RS ES FS MSB
auto q = PT::decode_posit_rs(pars1);
int reg = q.first;
int rs = q.second;
POSIT_UTYPE pars2 = pars1 << rs; // MSB: ES FS
POSIT_UTYPE exp = bitset_leftmost_get_const<T,esbits>()(pars2); // bitset_leftmost_getT(pars,esbits);
POSIT_UTYPE pars = pars2 << esbits; // MSB: FS left aligned in T
return UnpackedLow(negativeSign,reg,exp,pars);
//r.fraction = pars;
//std::cout << "fraction is " << std::bitset<sizeof(FT)*8>(r.fraction) << " with rs bits " << rs << " for reg " << reg << std::endl;
//r.exp = exp;
//r.regime = reg;
}
}
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::pack_low(UnpackedLow x) -> Posit
{
using PP=Posit<T,totalbits,esbits,FT,positspec>;
using PT=typename Posit<T,totalbits,esbits,FT,positspec>::PT;
using POSIT_UTYPE = typename PT::POSIT_UTYPE;
using POSIT_STYPE = typename PT::POSIT_STYPE;
switch(x.type)
{
case UnpackedT::Infinity:
// if infinity is missing return nan
return positspec != PositSpec::WithNan ? (x.negativeSign ? PP::ninf(): PP::pinf()): PP::nan();
case UnpackedT::Zero:
return PP(typename PP::DeepInit(),0);
case UnpackedT::NaN:
// if nan is missing return infinity
return positspec != PositSpec::WithInf ? PP::nan() : PP::pinf();
default:
break;
}
auto exp = x.exp;
auto reg = x.regime;
// for reg>=0: 1 0[reg+1] => size is reg+2
// for reg <0: 0[-reg] 0 => size is reg+1
auto rs = -reg+1 > reg+2 ? -reg+1:reg+2; //std::max(-reg + 1, reg + 2); MSVC issue
auto es = (totalbits-rs-1) < esbits ? (totalbits-rs-1): esbits; //std::min((int)(totalbits-rs-1),(int)esbits); MSVC issue
POSIT_UTYPE regbits = reg < 0 ? (PT::POSIT_HOLDER_MSB >> -reg) : (PT::POSIT_MASK << (PT::POSIT_HOLDER_SIZE-(reg+1))); // reg+1 bits on the left
POSIT_UTYPE eexp = msb_exp<POSIT_UTYPE,PT::POSIT_HOLDER_SIZE,esbits,(esbits == 00)>()(exp);
POSIT_UTYPE fraction = x.fraction;
POSIT_STYPE p = ((fraction >> (rs+es+1)) | (eexp >> (rs+1)) | (regbits>>1)) >> (sizeof(T)*8-totalbits);
//std::cout << "incoming " << x << std::endl;
//std::cout << "fraction before " << std::bitset<sizeof(FT)*8>(x.fraction) << " and " << " after " << std::bitset<sizeof(POSIT_UTYPE)*8>(fraction) << " residual exponent " << exp << " from " << eexponent << " and regime " << reg << std::endl;
//std::cout << "output sign " << std::bitset<sizeof(T)*8>(p) << " then " << std::bitset<sizeof(T)*8>(-p) << std::endl;
return PP(typename PP::DeepInit(),x.negativeSign ? -p : p);
}
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::half() const -> Posit<T,totalbits,esbits,FT,positspec>
{
UnpackedLow q = unpack_low();
if(q.type == UnpackedT::Regular)
{
// +- 2^(R expmax + E) 1.xyz == +- 2^(exp) 1.xyz
// where xyz are decimal digits
// 1.xyz / 2 => 0.1xyz ==> just exp--
//
// exp-- mean E-- if E s not null
// otherwise R-- and exp
if(q.exp == 0)
{
q.regime--; // will it undrflow?
q.exp = PT::POSIT_REG_SCALE-1; // maximum exponent
}
else
{
q.exp--;
}
return pack_low(q);
}
else
{
return *this;
}
}
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::twice() const -> Posit<T,totalbits,esbits,FT,positspec>
{
UnpackedLow q = unpack_low();
if(q.type == UnpackedT::Regular)
{
// +- 2^(R expmax + E) 1.xyz == +- 2^(exp) 1.xyz
// where xyz are decimal digits
// 1.xyz / 2 => 0.1xyz ==> just exp--
//
// exp-- mean E-- if E s not null
// otherwise R-- and exp
if(q.exp == PT::POSIT_REG_SCALE-1)
{
q.regime++; // will it overflo??
q.exp = 0; // maximum exponent
}
else
{
q.exp++;
}
return pack_low(q);
}
else
{
return *this;
}
}
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::unpacked_low2full(UnpackedLow q) -> UnpackedT
{
using POSIT_UTYPE = typename PT::POSIT_UTYPE;
UnpackedT r;
r.type = q.type;
r.negativeSign = q.negativeSign;
if(q.type == UnpackedT::Regular)
{
r.fraction = cast_msb<POSIT_UTYPE,PT::POSIT_HOLDER_SIZE,FT,UnpackedT::FT_bits>()(q.fraction);
r.exponent = PT::join_reg_exp(q.regime,q.exp);
}
return r;
}
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 auto Posit<T,totalbits,esbits,FT,positspec>::unpacked_full2low(UnpackedT x) -> UnpackedLow
{
if(x.type == UnpackedT::Regular)
{
auto eexponent = clamp<decltype(x.exponent)>(x.exponent,PT::minexponent(),PT::maxexponent()); // no overflow
auto rr = PT::split_reg_exp(eexponent);
auto frac = cast_msb<FT,sizeof(FT)*8,typename PT::POSIT_UTYPE,sizeof(typename PT::POSIT_UTYPE)*8>()(x.fraction);
return UnpackedLow(x.negativeSign,rr.first,rr.second,frac);
}
else
{
return UnpackedLow(x.type,x.negativeSign);
}
}
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> pack_posit(const typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT & x)
{
using PP=Posit<T,totalbits,esbits,FT,positspec>;
return PP::pack_low(PP::unpacked_full2low(x));
}
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
auto Posit<T,totalbits,esbits,FT,positspec>::analyze() -> info
{
using UT=UnpackedT;
using POSIT_UTYPE = typename PT::POSIT_UTYPE;
//using POSIT_STYPE = typename PT::POSIT_STYPE;
auto pa = v;
info i;
if(isinfinity())
{
if(positspec == PositSpec::WithNanInf)
i.sign = (pa & PT::POSIT_SIGNBIT) != 0;
i.infinity = true;
return i;
}
else if(isNaN())
{
i.nan = true;
return i;
}
else if(v == 0)
{
return i;
}
else
{
//constexpr int POSIT_RS_MAX = PT::POSIT_SIZE-1-esbits;
i.sign = (pa & PT::POSIT_SIGNBIT) != 0;
pa = pcabs(pa);
POSIT_UTYPE pars = pa << (PT::POSIT_EXTRA_BITS+1); // output MSB: RS ES FS
auto q = PT::decode_posit_rs(pars);
int reg = q.first;
int rs = q.second;
pars <<= rs; // MSB: ES FS
POSIT_UTYPE exp = bitset_leftmost_getT(pars,esbits);
pars <<= esbits; // output MSB: FS left aligned in T
//std::cout << std::bitset<PT::POSIT_HOLDER_SIZE>(pars) << std::endl;
i.ifraction = sizeof(FT) >= sizeof(T) ? pars << (UT::FT_bits-PT::POSIT_HOLDER_SIZE) : pars >> (PT::POSIT_HOLDER_SIZE-UT::FT_bits); // output: FS left aligned in FT (larger or equal to T)
i.exponent = PT::join_reg_exp(reg,exp);
i.exp = exp;
i.rs = rs;
i.k = reg;
i.es = totalbits-rs-1 < esbits ? totalbits-rs-1 : esbits; // std::min((int)(totalbits-rs-1),(int)esbits); MSVC issue
i.fs = totalbits-rs-i.es-1;
return i;
}
}
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
CONSTEXPR14 auto unpack_posit(const Posit<T,totalbits,esbits,FT,positspec> & p) -> typename Posit<T,totalbits,esbits,FT,positspec>::UnpackedT
{
using PP=Posit<T,totalbits,esbits,FT,positspec>;
return PP::unpacked_low2full(p.unpack_low());
}
#ifndef FPGAHLS
template <class X>
void printinfo(std::ostream & ons, typename X::value_t v)
{
using Q= typename printableinttype<typename X::value_t>::type;
X x(typename X::DeepInit(),v); // load the posit OK
typename X::UnpackedT u(x.unpack()); // unpack it OK
X xux(u); // pack
typename X::info ii = x.analyze();
if(ii.infinity)
ons << (X::PT::positspec == PositSpec::WithNanInf ? (ii.sign ? "posit(-infinity)" : "posit(+infinity)") : "posit(infinity)");
else if(ii.nan)
ons << "posit(nan)";
else
{
ons << " posit(" << (ii.sign ? "-" : "+") ;
ons << " rs/es/fs:" << std::dec << ii.rs << "/" << ii.es << "/" << ii.fs << " ";
ons << " k:" << std::dec << (Q)ii.k ;
ons << " exp:" << std::dec << (1<<ii.exp);
ons << " ifraction:" << std::hex << (Q)ii.ifraction;
ons << " binary:" << std::bitset<sizeof(typename X::value_t)*8>(xux.v) << ")";
}
}
#endif
#ifndef FPGAHLS
template <class T>
struct posit_formatter
{
public:
posit_formatter(T p): posit(p) {}
friend std::ostream & operator << (std::ostream & ons, const posit_formatter & x)
{
printinfo<T>(ons,x.posit.v);
return ons;
}
T posit;
};
#endif
namespace std
{
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
inline CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> abs(Posit<T,totalbits,esbits,FT,positspec> z)
{
using PP=Posit<T,totalbits,esbits,FT,positspec>;
return PP(PP::DeepInit(),pcabs(z.v));
}
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
inline CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> min(Posit<T,totalbits,esbits,FT,positspec> a, Posit<T,totalbits,esbits,FT,positspec> b)
{
return a <= b ? a : b;
}
template <class T,int totalbits, int esbits, class FT, PositSpec positspec>
inline CONSTEXPR14 Posit<T,totalbits,esbits,FT,positspec> max(Posit<T,totalbits,esbits,FT,positspec> a, Posit<T,totalbits,esbits,FT,positspec> b)
{
return a >= b ? a : b;
}
template <class B,int totalbits, int esbits, class FT, PositSpec positspec> class numeric_limits<Posit<B,totalbits,esbits,FT,positspec> > {
public:
using T=Posit<B,totalbits,esbits,FT,positspec>;
using PT=typename T::PT;
static constexpr bool is_specialized = true;
static constexpr T min() noexcept { return T::min(); }
static constexpr T max() noexcept { return T::max(); }
static constexpr T lowest() noexcept { return T::lowest (); }
//static constexpr int digits = 0; number of digits (in radix base) in the mantissa
//static constexpr int digits10 = 0;
static constexpr bool is_signed = true;
static constexpr bool is_integer = false;
static constexpr bool is_exact = false;
static constexpr int radix = 2;
static constexpr T epsilon() noexcept { return T::one().next()-T::one(); }
//static constexpr T round_error() noexcept { return T(); }
// this is also the maximum integer
static constexpr int min_exponent = PT::minexponent();
// static constexpr int min_exponent10 = 0;
static constexpr int max_exponent = PT::maxexponent();
//static constexpr int max_exponent10 = 0;
static constexpr bool has_infinity = true;
static constexpr bool has_quiet_NaN = positspec != PositSpec::WithInf;
static constexpr bool has_signaling_NaN = false;
//static constexpr float_denorm_style has_denorm = denorm_absent;
static constexpr bool has_denorm_loss = false;
static constexpr T infinity() noexcept { return T::infinity(); }
static constexpr T quiet_NaN() noexcept { return T::nan(); }
//static constexpr T signaling_NaN() noexcept { return T(); }
static constexpr T denorm_min() noexcept { return T::min(); }
static constexpr bool is_iec559 = false;
static constexpr bool is_bounded = false;
static constexpr bool is_modulo = false;
static constexpr bool traps = false;
static constexpr bool tinyness_before = false;
//static constexpr float_round_style round_style = round_toward_zero;
/*
round_toward_zero, if it rounds toward zero.
round_to_nearest, if it rounds to the nearest representable value.
round_toward_infinity, if it rounds toward infinity.
round_toward_neg_infinity, if it rounds toward negative infinity.
round_indeterminate, if the rounding style is indeterminable at compile time.
*/
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.