Skip to content

Instantly share code, notes, and snippets.

@kg
Last active March 18, 2024 21:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kg/ad960ba4b98aebe3473938d7f74b6dc3 to your computer and use it in GitHub Desktop.
Save kg/ad960ba4b98aebe3473938d7f74b6dc3 to your computer and use it in GitHub Desktop.
#include <stdint.h>
#define gint32 int32_t
#define guint32 uint32_t
#define guint64 uint64_t
#define guint8 uint8_t
typedef guint8 v128_u1 __attribute__ ((vector_size (16)));
typedef gint32 v128_i4 __attribute__ ((vector_size (16)));
static gint32
decode_value (guint8 *ptr, guint8 **new_ptr)
{
// *(bytes *)ptr and *(guint32 *)ptr by themselves don't force an i32 load of
// ptr in either x64 or wasm clang, so this is the only way to prefetch all the bytes
// without doing this, decode_value will do 5 individual single-byte memory loads,
// and each individual load is potentially bounds-checked. we produce one wide load
// we could overrun the source buffer by up to 11 bytes, but doing that on wasm is
// safe unless we're decoding from the absolute end of memory.
// we pad all buffers by 16 bytes in mono_wasm_load_bytes_into_heap, so we're fine
union {
v128_u1 b;
v128_i4 i;
} v;
v.b = *(v128_u1 *)ptr;
gint32 result;
// mask and shift two bits so we can have a 4-element jump table in wasm
guint8 flags = (v.b[0] & (0x80u | 0x40u)) >> 6;
switch (flags) {
case 0b00u:
case 0b01u:
// if (b & 0x80) == 0
result = v.b[0];
++ptr;
break;
case 0b10u:
// (b * 0x80) != 0, and (b & 0x40) == 0
// v.b = { ptr[1], ptr[0], ptr[0], ptr[0] }
v.b = __builtin_shufflevector(
v.b, v.b,
1, 0, 0, 0, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1
);
// result = v.b[0..3] where v.b[1..2] = 0 and v.b[0] &= 0x3F
result = v.i[0] & 0x3FFF;
ptr += 2;
break;
case 0b11u:
// i don't know why the default case is necessary here, but without it the jump table has 5 entries.
default:
// (b * 0x80) != 0, and (b & 0x40) != 0
if (v.b[0] == 0xFFu) {
// v.b = { ptr[4], ptr[3], ptr[2], ptr[1] }
v.b = __builtin_shufflevector(
v.b, v.b,
4, 3, 2, 1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1
);
// result = v.b[0..3];
result = v.i[0];
ptr += 5;
} else {
// v.b = { ptr[3], ptr[2], ptr[1], ptr[0] }
v.b = __builtin_shufflevector(
v.b, v.b,
3, 2, 1, 0, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1
);
// result = v.b[0..3] where v.b[0] &= 0x1F
result = v.i[0] & 0x1FFFFFFF;
ptr += 4;
}
break;
}
if (new_ptr)
*new_ptr = ptr;
return result;
}
gint32
decode_value_scalar (guint8 *ptr, guint8 **rptr)
{
guint8 b = *ptr;
gint32 len;
if ((b & 0x80) == 0){
len = b;
++ptr;
} else if ((b & 0x40) == 0){
len = ((b & 0x3f) << 8 | ptr [1]);
ptr += 2;
} else if (b != 0xff) {
len = ((b & 0x1f) << 24) |
(ptr [1] << 16) |
(ptr [2] << 8) |
ptr [3];
ptr += 4;
}
else {
len = (ptr [1] << 24) | (ptr [2] << 16) | (ptr [3] << 8) | ptr [4];
ptr += 5;
}
if (rptr)
*rptr = ptr;
return len;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment