Skip to content

Instantly share code, notes, and snippets.

@arnetheduck
Created March 1, 2020 16:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arnetheduck/d665107931b1ee5a29214df5f96c6ccd to your computer and use it in GitHub Desktop.
Save arnetheduck/d665107931b1ee5a29214df5f96c6ccd to your computer and use it in GitHub Desktop.
work in progress varint codec
# Little-endian base 128 variable length integer utilities, as seen in
# https://en.wikipedia.org/wiki/LEB128
#
# Used in formats like DWARF, WASM - unsigned variant identical to protobuf/go
# varint. Exception/Defect free for WASM use
import
stew/[bitops2, result]
const
# Given the truncated logarithm of a number, how many bytes do we need to
# encode it?
lengths = block:
var v: array[64, int8]
for i in 0..<64:
v[i] = int8((i + 7) div 7)
v
func leb128Len*(x: SomeUnsignedInt): int8 {.inline, raises: [].} =
## Returns number of bytes required to encode integer ``x`` as varint.
if x == 0: 1
else: lengths[log2trunc(x)]
func maxLeb128Len*(T: type): int8 {.inline, raises: [].} = leb128Len(T.high)
type
LebBuf*[T: SomeUnsignedInt] = object
data*: array[maxLeb128Len(T), byte] # len(data) <= 10
len*: int8 # >= 1 when holding valid protobuf
LebError* = enum
incomplete = "End marker not found in given stream"
overflow = "Value would overflow"
template write7(next: untyped) =
if v > type(v)(127):
result.data[result.len] = cast[byte](v) or 0x80'u8
result.len += 1
v = v shr 7
next
func toBytesLeb128*(v: SomeUnsignedInt): LebBuf[typeof(v)] {.noinit, raises: [].} =
var
v = v
# A really clever person would write a macro for this expansion, and then only
# really clever people would be able to read it.
# There is something clever about this code however: a clever compiler will
# use type information to cleverly optimze away the dead code, which is one of
# the clever reasons toBytes is generic
write7(): # 7
write7(): # 14
write7(): # 21
write7(): # 28
write7(): # 35
write7(): # 42
write7(): # 49
write7(): # 56
write7(): # 63
discard
result.data[result.len] = cast[byte](v) # high bit not set since v <= 127 at this point!
result.len += 1
template read7(shift: untyped) =
if (shift div 7) >= x.len():
return err(incomplete)
let
b = x[shift div 7]
valb = b and 0x7f'u8
val = T(valb)
vals = val shl shift
if vals shr shift != val:
return err(overflow)
res = res or vals
if b == valb: # High bit not set
return ok((res, int8((shift div 7) + 1)))
func fromBytesLeb128*(
T: typedesc[SomeUnsignedInt],
x: openArray[byte]): Result[tuple[val: T, len: int8], LebError] {.raises: [].} =
## Parse a LEB128 byte sequence and return value and how many bytes were
## parsed
var
res: T
read7(0)
read7(7)
read7(14)
read7(21)
read7(28)
read7(35)
read7(42)
read7(49)
read7(56)
read7(63)
err(incomplete)
template toOpenArray*(v: LebBuf): openArray[byte] =
toOpenArray(v.data, 0, v.len - 1)
template len*(v: LebBuf): auto = v.len
template `@`*(v: LebBuf): seq[byte] =
@v.toOpenArray()
func scanLeb128*(
T: typedesc[SomeUnsignedInt],
x: openArray[byte]): Result[int8, LebError] =
## Scan a buffer for a valid leb128-encoded value that at most fits in a
## uint64, and report how many bytes it uses
# TODO this can be done efficiently with SSE
T.fromBytesLeb128(x).map(proc(a: auto): auto = a.len)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment