Skip to content

Instantly share code, notes, and snippets.

@nat-n
Last active April 4, 2019 20:33
Show Gist options
  • Save nat-n/a8e3da75019b029058db3b643e4f1694 to your computer and use it in GitHub Desktop.
Save nat-n/a8e3da75019b029058db3b643e4f1694 to your computer and use it in GitHub Desktop.
Functions for encoding and decoding integers into byte strings with Variable-length quantity encoding. https://en.wikipedia.org/wiki/Variable-length_quantity
from typing import List, Tuple
def encode(number: int) -> bytes:
assert number >= 0, "Can only encode positive integers"
flag_mask = 0b10000000
value_mask = 0b01111111
bytes_buffer = [number & value_mask]
number = number >> 7
while number:
bytes_buffer.append(flag_mask | (number & value_mask))
number = number >> 7
return bytes(reversed(bytes_buffer))
def decode_first(data: bytes) -> int:
"""
Decodes the first integer in the input and ignore the rest.
"""
if not data:
return 0
value_mask = 0b01111111
result = data[0] & value_mask
if data[0] >= 128:
for byte in data[1:]:
result = (result << 7) + (byte & value_mask)
if byte < 128:
break
return result
def decode(data: bytes) -> Tuple[int, bytes]:
"""
Decodes the first integer in the input and return the rest along side the
decoded value.
"""
if not data:
return 0
value_mask = 0b01111111
result = data[0] & value_mask
decoded_length = 1
if data[0] >= 128:
for byte in data[1:]:
result = (result << 7) + (byte & value_mask)
decoded_length += 1
if byte < 128:
break
return result, data[decoded_length:]
def decode_all(data: bytes) -> List[int]:
result = []
remainder = data
while remainder:
value, remainder = decode(remainder)
result.append(value)
return result
def test_encode_and_decode_symmetry(size):
for i in range(0, size, 7):
assert decode_first(encode(i)) == i, f"Couldn't encode and decode {i}"
for i in range(0, size, 7):
assert decode(encode(i))[0] == i, f"Couldn't encode and decode {i}"
def test_encode_examples():
assert list(encode(0)) == [0]
assert list(encode(1)) == [1]
assert list(encode(127)) == [127]
assert list(encode(128)) == [129, 0]
assert list(encode(255)) == [129, 127]
assert list(encode(256)) == [130, 0]
assert list(encode(257)) == [130, 1]
assert list(encode(10000)) == [206, 16]
assert list(encode(16383)) == [255, 127]
assert list(encode(16384)) == [129, 128, 0]
assert list(encode(1234567890)) == [132, 204, 216, 133, 82]
def test_decode_examples():
assert decode([255, 0]) == (16256, [])
assert decode([1, 255, 0]) == (1, [255, 0])
assert decode([255, 0, 255, 0]) == (16256, [255, 0])
assert decode([132, 204, 216, 133, 82]) == (1234567890, [])
def test_decode_all_examples():
assert decode_all([255, 0]) == [16256]
assert decode_all([132, 204, 216, 133, 82]) == [1234567890]
assert decode_all([255, 0, 255, 0]) == [16256, 16256]
assert decode_all([0, 1, 233, 233, 100, 100]) == [0, 1, 1733860, 100]
if __name__ == "__main__":
print("Testing encode/decode symetry")
test_encode_and_decode_symmetry(1_000_000)
print("Testing encode for specific cases")
test_encode_examples()
print("Testing decode for specific cases")
test_decode_examples()
print("Testing decode_all for specific cases")
test_decode_all_examples()
print("OK")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment