Skip to content

Instantly share code, notes, and snippets.

@larseggert
Forked from mikkelfj/uf16.h
Created July 5, 2017 06:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save larseggert/7e37c9d9d45d1fa383dc5c3820c50f43 to your computer and use it in GitHub Desktop.
Save larseggert/7e37c9d9d45d1fa383dc5c3820c50f43 to your computer and use it in GitHub Desktop.
Proposed reference spec and implementation for QUIC time delta floating point format
/*
* uf16.h has been renamed ufloat16.h.
*/
/*
The MIT License (MIT)
Copyright (c) 2017 Mikkel F. Jørgensen, dvide.com
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
/*
The IETF QUIC transport protocol draft November 2016 appears to provide
an incorrect example of a delta encoded timestamp in the ACK Frame
section because 0x800 maps to 2048, not 4096 as stated. While the
format description is otherwise largely correct, it is not very easy to
follow and it doesn't suggest an easy optimization when the encoded
exponent is 1. Chromium implements this optimization as of January
2017.
The following definition seeks to describe the format more precisely so
that it is both easier to understand and to implement. It is supported
by a compact yet efficient reference implementation.
QUIC uses a custom floating point format ufloat16 to represent a
non-negative time delta in microseconds.
The ufloat16 format encodes an unsigned 64-bit integer value n as an
unsigned 16-bit floating point value v with a non-negative exponent
and with no special values like NaN (*). The decoded value k is
given as k = m * 2^p where m and p are both derived from v. m is a
12-bit significand including a hidden bit. p is a 5-bit exponent
where 0 <= p <= 30. The encoding is surjective with k <= n.
The encoded value v is represented as v = e(5)f(11) = e * 2^11 + f.
m and p are derived as: If e > 0 then p = e - 1 and m = f + 2^11.
Otherwise e = 0, p = e and m = f. All values are unsigned.
An equivalent but more efficient interpretation is given as: v =
e(5)f(11). If e >= 2 then p = e - 1, m = f + 2^11 and k = m * 2^p =
(f + 2^11) * 2^(e - 1). Otherwise e < 2 and v < 2^12 with v =
e(5)f(11) = 0(4)m(12), p = 0, m = v, and k = m * 2^p = v. All
values are unsigned.
(*) When encoding an unsigned 64-bit value n, any value that would
overflow the 16-bit encoded representation v is clamped to v =
UFLOAT16_MAX = 2^16 - 1 = 0xFFFF. A decoded value k is no greater
than UFLOAT16_UINT64_MAX = (2^12 - 1) * 2^30 = 0x3FFC0000000.
For completeness: decoding a value v returns the lower bound k of
any value n that encodes to the same value v. The encoding is a
total surjective monotonically increasing discrete function.
On common hardware that supports unsigned 64-bit two's complement
arithmetic, ufloat16 decoding can be computed as `if v < (1 <<
12) then return v; else p = (v >> 11) - 1; m = v - (p << 11); k = m
<< p; return k; endif`. Note that v - (p << 11) exposes the hidden
bit while removing the exponent. Likewise, ufloat16 encoding can
be computed as `if n < (1 << 12) then return n; else if n >=
UFLOAT16_INT64_MAX then return UFLOAT16_MAX; else p =
logbase2_floor(n >> 11); v = (p << 11) + (n >> p); return v; endif`.
Note that adding (p << 11) intentionally overflows and hides the
most significant bit and increments the exponent by one. The
`logbase2_floor` operation only needs to handle non-zero unsigned
32-bit integers because `0 < (n >> 11) < 2^32` and often has
hardware support (in C usually via __builtin_clz or
_BitScanReverse), otherwise it can be implemented efficiently in
software via 32-bit deBruijn multiplication.
*/
#ifndef UFLOAT16_H
#define UFLOAT16_H
#include <stdint.h>
#ifndef ufloat16_logbase2_floor
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#if defined(__GNUC__) || __has_builtin(__builtin_clz)
/* Undefined for `x == 0`. */
static inline int __ufloat16_logbase2_floor_builtin(uint32_t x)
{
return 31 - __builtin_clz(x);
}
#define ufloat16_logbase2_floor(x) __ufloat16_logbase2_floor_builtin(x)
#elif defined(_MSC_VER)
#include <intrin.h>
/* Undefined for `x == 0`. */
static __inline int __ufloat16_logbase2_floor_msvc(uint32_t x)
{
unsigned long Index = 0;
_BitScanReverse(&Index, (unsigned long)x);
return (int)Index;
}
#define ufloat16_logbase2_floor(x) __ufloat16_logbase2_floor_msvc(x)
#endif
#endif /* ufloat16_logbase2_floor */
#if !defined(__cplusplus) && defined(_MSC_VER) && !defined(inline)
#define __UFLOAT16_INLINE_PATCH
#define inline __inline
#endif
/* -------------------------------------------------------- */
/* This section can be extracted for reference purposes */
/* or as a minimal fully functioning implementation. */
/* -------------------------------------------------------- */
#include <stdint.h>
#define UFLOAT16_MAX UINT16_C(0xFFFF)
#define UFLOAT16_UINT64_MAX UINT64_C(0x3FFC0000000)
/*
* Optionally provide `ufloat16_logbase2_floor` hardware support.
* It operates on unsigned 32-bit and may be undefined for 0.
*/
#ifndef ufloat16_logbase2_floor
static inline int __ufloat16_logbase2_floor_reference(uint32_t x)
{
static const uint32_t K = UINT32_C(0x07C4ACDD);
static const int deBruijn[32] =
{
0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
};
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return deBruijn[(uint32_t)(x * K) >> 27];
}
#define ufloat16_logbase2_floor(x) __ufloat16_logbase2_floor_reference(x)
#endif /* ufloat16_logbase2_floor */
static inline uint64_t ufloat16_decode(uint16_t v)
{
uint16_t p;
uint64_t m;
if (v < (1 << 12)) {
return v;
}
p = (v >> 11) - 1;
m = v - (p << 11);
return m << p;
}
static inline uint16_t ufloat16_encode(uint64_t n)
{
uint16_t p;
if (n < (1 << 12)) {
return (uint16_t)n;
}
if (n >= UFLOAT16_UINT64_MAX) {
return UFLOAT16_MAX;
}
p = (uint16_t)ufloat16_logbase2_floor((uint32_t)(n >> 11));
return (uint16_t)(n >> p) + (p << 11);
}
/* ----------------------------------------------------- */
#ifdef __UFLOAT16_INLINE_PATCH
#undef inline
#endif
#endif /* UFLOAT16_H */
#ifdef UFLOAT16_VERIFY
/* Script to verify ufloat16 test vectors
#!/bin/sh
set -e
cp ufloat16.h ufloat16_verify.c
cc -DUFLOAT16_VERIFY -DUFLOAT16_VERIFY_MAIN -Wall -Wpedantic ufloat16_verify.c -o ufloat16_verify
./ufloat16_verify
rm ufloat16_verify.c ufloat16_verify
*/
struct __ufloat16_test_vector {
uint64_t input; /* n */
uint16_t encoded; /* v */
uint64_t decoded; /* k */
};
static struct __ufloat16_test_vector __ufloat16_test_vectors[] = {
{ 0, 0, 0 },
{ 1, 1, 1 },
{ 0x7FF, 0x7FF, 0x7FF },
{ 0x800, 0x800, 0x800 },
{ 0x801, 0x801, 0x801 },
{ 0xFFF, 0xFFF, 0xFFF },
{ 0x1000, 0x1000, 0x1000 },
{ 0x1001, 0x1000, 0x1000 },
{ 0x1002, 0x1001, 0x1002 },
{ 0x2004, 0x1801, 0x2004 },
{ 0x10000000, 0x9000, 0x10000000 },
{ 0x10080000, 0x9004, 0x10080000 },
{ 0x3FFC0000000, 0xFFFF, 0x3FFC0000000 },
{ 0x40000000000, 0xFFFF, 0x3FFC0000000 },
{ 0x7F00000000000, 0xFFFF, 0x3FFC0000000 },
{ UFLOAT16_UINT64_MAX, UFLOAT16_MAX, UFLOAT16_UINT64_MAX }
};
#include <stdlib.h>
#include <stdio.h>
static size_t __ufloat16_test_vectors_count =
sizeof(__ufloat16_test_vectors) / sizeof(struct __ufloat16_test_vector);
static int __ufloat16_verify()
{
int i, e = 0;
struct __ufloat16_test_vector tv, *TV = __ufloat16_test_vectors;
for (i = 0; i < __ufloat16_test_vectors_count; ++i) {
tv.input = TV[i].input;
tv.encoded = ufloat16_encode(tv.input);
tv.decoded = ufloat16_decode(tv.encoded);
if (tv.encoded != TV[i].encoded || tv.decoded != TV[i].decoded) {
printf( "ufloat16 test vector %d failed:\n"
" evaluated : input=%08llX, encoded=%04X, decoded=%08llX\n"
" expected : input=%08llX, encoded=%04X, decoded=%08llX\n",
i + 1,
(unsigned long long)tv.input,
(unsigned short)tv.encoded,
(unsigned long long)tv.decoded,
(unsigned long long)TV[i].input,
(unsigned short)TV[i].encoded,
(unsigned long long)TV[i].decoded);
++e;
}
}
if (e == 0) {
printf("All %d ufloat16 test vectors passed!\n", i);
}
return e != 0;
}
#ifdef UFLOAT16_VERIFY_MAIN
int main(int argv, char **argc)
{
return __ufloat16_verify();
}
#endif
#endif /* UFLOAT16_VERIFY */
#include "ufloat16.h"
/* Older and more thorough test */
/*
* To compile and run test:
*
* cc -Wall -Wpedantic ufloat16_test.c -o ufloat16_test
* ./ufloat16_test
*/
#include <stdio.h>
#define check_basic(x, s) ((x) || ((++ret) && printf("basic check at line %d failed: %s\n", __LINE__, s)))
static int verify_basic()
{
int ret = 0;
uint64_t k;
uint16_t v;
k = ufloat16_encode(0);
v = ufloat16_decode(k);
check_basic(k == 0, "k == 0");
check_basic(v == 0, "v == 0");
k = ufloat16_encode(1);
v = ufloat16_decode(k);
check_basic(k == 1, "k == 1");
check_basic(v == 1, "v == 1");
k = ufloat16_decode(2048);
v = ufloat16_encode(k);
check_basic(k == 2048, "k == 2048");
check_basic(v == 0x0800, "v == 0x0800");
k = ufloat16_decode(4096);
v = ufloat16_encode(k);
check_basic(k == 4096, "k == 4096");
check_basic(v == 0x1000, "v == 0x1000");
v = ufloat16_encode(UFLOAT16_UINT64_MAX);
k = ufloat16_decode(v);
check_basic(k == UFLOAT16_UINT64_MAX, "k == UFLOAT16_UINT64_MAX");
check_basic(v == UFLOAT16_MAX, "v == UFLOAT16_MAX");
v = ufloat16_encode(UFLOAT16_UINT64_MAX + 1);
k = ufloat16_decode(v);
check_basic(k == UFLOAT16_UINT64_MAX, "k == UFLOAT16_UINT64_MAX");
check_basic(v == UFLOAT16_MAX, "v == UFLOAT16_MAX");
v = ufloat16_encode(UINT64_MAX);
k = ufloat16_decode(v);
check_basic(k == UFLOAT16_UINT64_MAX, "k == UFLOAT16_UINT64_MAX");
check_basic(v == UFLOAT16_MAX, "v == UFLOAT16_MAX");
return ret > 0;
}
struct ufloat16_test_pair {
uint64_t k;
uint16_t v;
};
static const struct ufloat16_test_pair test_pairs[] = {
/* Small numbers represent themselves. */
{0, 0},
{1, 1},
{2, 2},
{3, 3},
{4, 4},
{5, 5},
{6, 6},
{7, 7},
{15, 15},
{31, 31},
{42, 42},
{123, 123},
{1234, 1234},
/* Check transition through 2^11. */
{2046, 2046},
{2047, 2047},
{2048, 2048},
{2049, 2049},
/* Running out of significand at 2^12. */
{4094, 4094},
{4095, 4095},
{4096, 4096},
{4097, 4096},
{4098, 4097},
{4099, 4097},
{4100, 4098},
{4101, 4098},
/* Check transition through 2^13. */
{8190, 6143},
{8191, 6143},
{8192, 6144},
{8193, 6144},
{8194, 6144},
{8195, 6144},
{8196, 6145},
{8197, 6145},
/* Half-way through the exponents. */
{0x7FF8000, 0x87FF},
{0x7FFFFFF, 0x87FF},
{0x8000000, 0x8800},
{0xFFF0000, 0x8FFF},
{0xFFFFFFF, 0x8FFF},
{0x10000000, 0x9000},
/* Transition into the largest exponent. */
{0x1FFFFFFFFFE, 0xF7FF},
{0x1FFFFFFFFFF, 0xF7FF},
{0x20000000000, 0xF800},
{0x20000000001, 0xF800},
{0x2003FFFFFFE, 0xF800},
{0x2003FFFFFFF, 0xF800},
{0x20040000000, 0xF801},
{0x20040000001, 0xF801},
/* Transition into the max value and clamping. */
{0x3FF80000000, 0xFFFE},
{0x3FFBFFFFFFF, 0xFFFE},
{0x3FFC0000000, 0xFFFF},
{0x3FFC0000001, 0xFFFF},
{0x3FFFFFFFFFF, 0xFFFF},
{0x40000000000, 0xFFFF},
{0xFFFFFFFFFFFFFFFF, 0xFFFF},
/* Symbolic constants. */
{UFLOAT16_UINT64_MAX, UFLOAT16_MAX},
};
static int verify_test_pair(int i, struct ufloat16_test_pair tp)
{
uint64_t k, k2;
uint16_t v;
v = ufloat16_encode(tp.k);
k = ufloat16_decode(tp.v);
k2 = ufloat16_decode(tp.v + 1) - 1;
if (v != tp.v || k > tp.k || k2 < tp.k) {
printf("test pair %d failed: tp.k = 0x%08llX, tp.v = 0x%04X\n",
i, (unsigned long long)tp.k, (unsigned)tp.v);
printf(" got: k = 0x%08llX, k2 = 0x%08llX, v = 0x%04X\n",
(unsigned long long)k, (unsigned long long)k2, (unsigned)v);
return 1;
}
return 0;
}
static int verify_all_test_pairs()
{
int ret = 0;
size_t i, n;
n = sizeof(test_pairs) / sizeof(test_pairs[0]);
for (i = 0; i < n; ++i) {
ret += verify_test_pair(i, test_pairs[i]);
if (ret > 9) {
printf("aborting all pairs test\n");
return 1;
}
}
return ret > 0;
}
#define check_order(i, x, s) ((x) || ((++ret) && printf("order iteration %d failed: %s\n", (int)i, s)))
static int verify_order()
{
int ret = 0;
int i;
uint64_t a, b;
uint16_t v;
a = ufloat16_encode(0);
v = ufloat16_decode(a);
check_order(0, a == 0, "a == 0");
check_order(0, v == 0, "v == 0");
for (i = 1; i <= UFLOAT16_MAX; ++i) {
b = ufloat16_decode(i);
v = ufloat16_encode(b);
check_order(i, a < b, "a < b");
check_order(i, v == i, "v == i");
if (ret > 9) {
printf("aborting ordered test\n");
return 1;
}
a = b;
}
return ret > 0;
}
int main(int argv, char **argc)
{
if (verify_basic() || verify_all_test_pairs() || verify_order()) {
return 1;
}
printf("Test passed!\n");
return 0;
}
#!/bin/sh
set -e
cp ufloat16.h ufloat16_verify.c
cc -DUFLOAT16_VERIFY -DUFLOAT16_VERIFY_MAIN -Wall -Wpedantic ufloat16_verify.c -o ufloat16_verify
./ufloat16_verify
rm ufloat16_verify.c ufloat16_verify
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment