Skip to content

Instantly share code, notes, and snippets.

@ayende
Created April 21, 2023 23:26
Show Gist options
  • Save ayende/af3c4a7acc8cc2508e0b676bc9a8eee5 to your computer and use it in GitHub Desktop.
Save ayende/af3c4a7acc8cc2508e0b676bc9a8eee5 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations;
using System.Diagnostics;
using System.Linq;
using System.Numerics;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Text;
var Set = OffloadedNibble.Set;
var TryGetValue = OffloadedNibble.TryGetValue;
Console.WriteLine("Realistic:");
RunTest(Generate_Realistic);
Console.WriteLine("Full:");
RunTest(Generate_Full);
void RunTest(Func<IEnumerable<long>> gen)
{
Span<byte> buffer = new byte[8192];
var enumerator = gen().GetEnumerator();
var index = 0;
while (true)
{
enumerator.MoveNext();
var k = enumerator.Current;
enumerator.MoveNext();
var v = enumerator.Current;
if (Set(buffer, k, v) == false)
{
Console.WriteLine("Inserted: " + Validate(buffer, index));
break;
}
index++;
Validate(buffer, index);
}
int Validate(Span<byte> buffer, int index)
{
var enumerator = gen().GetEnumerator();
var dic = new Dictionary<long, long>();
for (int i = 0; i < index; i++)
{
enumerator.MoveNext();
var k = enumerator.Current;
enumerator.MoveNext();
var v = enumerator.Current;
dic[k] = v; // need to handle update to the same value
}
foreach (var (k, expected) in dic)
{
if (TryGetValue(buffer, k, out var actual) == false || expected != actual)
{
Console.WriteLine($"Missed: {k}! {expected} != {actual}");
break;
}
}
return dic.Count;
}
}
IEnumerable<long> Generate_Full()
{
var rand = new Random(2023_04_21);
while (true)
{
yield return rand.Next(100) switch
{
< 3 => rand.NextInt64(0, 1L << 7), // 3%
< 10 => rand.NextInt64(0, 1L << 15),// 7%
< 35 => rand.NextInt64(0, 1L << 23),// 25%
< 75 => rand.NextInt64(0, 1L << 31),// 35%
< 90 => rand.NextInt64(0, 1L << 39),// 15%
< 95 => rand.NextInt64(0, 1L << 47),// 5%
< 98 => rand.NextInt64(0, 1L << 55),// 3%
_ => rand.NextInt64(0, 1L << 62) // 2%
};
}
}
IEnumerable<long> Generate_Realistic()
{
var rand = new Random(2023_04_21);
while (true)
{
yield return rand.Next(100) switch
{
< 1 => rand.NextInt64(0, 1L << 7), // 1%
< 3 => rand.NextInt64(0, 1L << 15), // 2%
< 30 => rand.NextInt64(0, 1L << 23),// 27%
< 75 => rand.NextInt64(0, 1L << 31),// 35%
_ => rand.NextInt64(0, 1L << 39), // 25%
};
}
}
public static class Raw
{
public static bool Set(Span<byte> page, long key, long val)
{
Debug.Assert(page.Length == 8192);
Debug.Assert(page.Length == 8192);
// takes two bytes, but we reserve 8 bytes to make alignment easier
ref var numberOfItems = ref Unsafe.AsRef(MemoryMarshal.Cast<byte, ushort>(page)[0]);
// actual usable range
var entries = MemoryMarshal.CreateSpan(ref MemoryMarshal.Cast<byte, long>(page)[1], page.Length / sizeof(long) - 1);
if (numberOfItems >= entries.Length / 2)
return false; // we are full, need to split the page
var keys = entries[..numberOfItems]; // from the bottom
var values = entries[(entries.Length - numberOfItems)..]; // from the top
var idx = keys.BinarySearch(key); // makes searching easy
if (idx >= 0) // update
{
values[idx] = val;
return true;
}
idx = ~idx; // find where it *should* go
numberOfItems++;
// now extend the ranges
keys = entries[..numberOfItems];
values = entries[(entries.Length - numberOfItems)..];
// move the existing entries to make room for the new one
keys[idx..(numberOfItems - 1)].CopyTo(keys[(idx + 1)..]);
values[1..(idx + 1)].CopyTo(values[0..idx]);
keys[idx] = key;
values[idx] = val;
return true;
}
public static bool TryGetValue(Span<byte> page, long key, out long val)
{
Debug.Assert(page.Length == 8192);
// takes two bytes, but we reserve 8 bytes to make alignment easier
ref var numberOfItems = ref Unsafe.AsRef(MemoryMarshal.Cast<byte, ushort>(page)[0]);
// actual usable range
var entries = MemoryMarshal.CreateSpan(ref MemoryMarshal.Cast<byte, long>(page)[1], page.Length / sizeof(long) - 1);
var keys = entries[..numberOfItems];
var values = entries[(entries.Length - numberOfItems)..];
var idx = keys.BinarySearch(key);
if (idx >= 0) // update
{
val = values[idx];
return true;
}
val = default;
return false;
}
}
public static class VarInt
{
private static long Read7BitEncodedInt64(Span<byte> buffer, out int len)
{
// Adapted from: https://github.com/dotnet/runtime/blob/2087ceb0c70f0c13e79fbbdec90efd242e1f2cd4/src/libraries/System.Private.CoreLib/src/System/IO/BinaryReader.cs#L588
ulong result = 0;
byte byteReadJustNow;
len = 0;
// Read the integer 7 bits at a time. The high bit
// of the byte when on means to continue reading more bytes.
//
// There are two failure cases: we've read more than 10 bytes,
// or the tenth byte is about to cause integer overflow.
// This means that we can read the first 9 bytes without
// worrying about integer overflow.
const int MaxBytesWithoutOverflow = 9;
for (int shift = 0; shift < MaxBytesWithoutOverflow * 7; shift += 7)
{
// ReadByte handles end of stream cases for us.
byteReadJustNow = buffer[len++];
result |= (byteReadJustNow & 0x7Ful) << shift;
if (byteReadJustNow <= 0x7Fu)
{
return (long)result; // early exit
}
}
// Read the 10th byte. Since we already read 63 bits,
// the value of this byte must fit within 1 bit (64 - 63),
// and it must not have the high bit set.
byteReadJustNow = buffer[len++];
if (byteReadJustNow > 0b_1u)
{
throw new FormatException("Format_Bad7BitInt");
}
result |= (ulong)byteReadJustNow << (MaxBytesWithoutOverflow * 7);
return (long)result;
}
private static int Write7BitEncodedInt64(long value, Span<byte> buffer)
{
// Adapted from: https://github.com/dotnet/runtime/blob/2087ceb0c70f0c13e79fbbdec90efd242e1f2cd4/src/libraries/System.Private.CoreLib/src/System/IO/BinaryWriter.cs#L492
ulong uValue = (ulong)value;
// Write out an int 7 bits at a time. The high bit of the byte,
// when on, tells reader to continue reading more bytes.
//
// Using the constants 0x7F and ~0x7F below offers smaller
// codegen than using the constant 0x80.
int index = 0;
while (uValue > 0x7Fu)
{
buffer[index++] = ((byte)((uint)uValue | ~0x7Fu));
uValue >>= 7;
}
buffer[index++] = ((byte)uValue);
return index;
}
private static int BinarySearch(Span<byte> page, long key)
{
var offsets = OffsetsFor(page);
int lo = 0, hi = offsets.Length - 1;
while (lo <= hi)
{
int mid = (lo + hi) >> 1;
var entryOffset = offsets[mid];
var cur = Read7BitEncodedInt64(page[entryOffset..], out _);
int cmp = key.CompareTo(cur);
if (cmp == 0)
return mid;
if (cmp < 0)
lo = mid + 1;
else
hi = mid - 1;
}
// where it is *supposed* to go
return ~lo;
}
private static Span<ushort> OffsetsFor(Span<byte> page)
{
var asShorts = MemoryMarshal.Cast<byte, ushort>(page);
var bottom = asShorts[0];
var top = asShorts[1]; // unused here, included for clarity
return MemoryMarshal.CreateSpan(ref asShorts[2], (bottom - 4) / 2);
}
public static bool Set(Span<byte> page, long key, long val)
{
Debug.Assert(page.Length == 8192);
// header is 4 bytes [bottom, top]
ref var bottom = ref Unsafe.AsRef(MemoryMarshal.Cast<byte, ushort>(page)[0]);
ref var top = ref Unsafe.AsRef(MemoryMarshal.Cast<byte, ushort>(page)[1]);
if (bottom == 0) // init empty page
{
top = 8192; // top of the page
bottom = 4; // leave 4 bytes for the bottom/top header
}
var usableSpace = top - bottom;
Span<byte> temp = stackalloc byte[18]; // max varint = 9 - we need to encode 2
var reqEntryLen = Write7BitEncodedInt64(key, temp);
reqEntryLen += Write7BitEncodedInt64(val, temp[reqEntryLen..]);
var idx = BinarySearch(page, key);
Span<ushort> offsets;
if (idx >= 0)
{
// let's check if we can update?
offsets = OffsetsFor(page);
var entryOffset = offsets[idx];
Read7BitEncodedInt64(page[entryOffset..], out var keyLen);
Read7BitEncodedInt64(page[(entryOffset + keyLen)..], out var valLen);
if (reqEntryLen > keyLen + valLen) // cannot fit, need new allocation
{
if (reqEntryLen > usableSpace)
return false; // we are full, need to split the page
}
else
{
// can fit, just copy & done
temp[..reqEntryLen].CopyTo(page[entryOffset..]);
return true;
}
}
else // increase size of offsets array
{
if (reqEntryLen + sizeof(ushort) > usableSpace)
return false; // we are full, need to split the page
idx = ~idx;
bottom += 2;
offsets = OffsetsFor(page);
offsets[idx..(offsets.Length - 1)].CopyTo(offsets[(idx + 1)..]);
}
top -= (ushort)reqEntryLen;
offsets[idx] = top; // new data location
temp[..reqEntryLen].CopyTo(page[top..]);
return true;
}
public static bool TryGetValue(Span<byte> page, long key, out long val)
{
var idx = BinarySearch(page, key);
if (idx < 0) // update
{
val = default;
return false;
}
var offsets = OffsetsFor(page);
var entryOffset = offsets[idx];
_ = Read7BitEncodedInt64(page[entryOffset..], out var keyLen);
val = Read7BitEncodedInt64(page[(entryOffset + keyLen)..], out _);
return true;
}
}
public static class Nibble
{
private static int BinarySearch(Span<byte> page, long key)
{
var offsets = OffsetsFor(page);
int lo = 0, hi = offsets.Length - 1;
while (lo <= hi)
{
int mid = (lo + hi) >> 1;
var entryOffset = offsets[mid];
var (cur, _) = Decode(page[entryOffset..]);
int cmp = key.CompareTo(cur);
if (cmp == 0)
return mid;
if (cmp < 0)
lo = mid + 1;
else
hi = mid - 1;
}
// where it is *supposed* to go
return ~lo;
}
private static Span<ushort> OffsetsFor(Span<byte> page)
{
var asShorts = MemoryMarshal.Cast<byte, ushort>(page);
var bottom = asShorts[0];
var top = asShorts[1]; // unused here, included for clarity
return MemoryMarshal.CreateSpan(ref asShorts[2], (bottom - 4) / 2);
}
private static int Encode(Span<byte> buffer, long key, long val)
{
var keyLen = (int)(8 - Lzcnt.X64.LeadingZeroCount((ulong)key) / 8);
var valLen = (int)(8 - Lzcnt.X64.LeadingZeroCount((ulong)val) / 8);
buffer[0] = (byte)(keyLen << 4 | valLen);
MemoryMarshal.AsBytes(new Span<long>(ref key))[..keyLen].CopyTo(buffer[1..]);
MemoryMarshal.AsBytes(new Span<long>(ref val))[..valLen].CopyTo(buffer[(keyLen + 1)..]);
return 1 + keyLen + valLen;
}
private static unsafe uint Encode(byte* buffer, long key, long val)
{
var keyLen = (uint)(8 - Lzcnt.X64.LeadingZeroCount((ulong)key) / 8);
var valLen = (uint)(8 - Lzcnt.X64.LeadingZeroCount((ulong)val) / 8);
buffer[0] = (byte)(keyLen << 4 | valLen);
Unsafe.CopyBlock(buffer + 1, &key, keyLen);
Unsafe.CopyBlock(buffer + 1 + keyLen, &val, keyLen);
return 1 + keyLen + valLen;
}
private static int DecodeEntryLength(Span<byte> buffer)
{
var keyLen = buffer[0] >> 4;
var valLen = buffer[0] & 0xF;
return keyLen + valLen + 1;
}
private static (long Key, long Val) Decode(Span<byte> buffer)
{
var keyLen = buffer[0] >> 4;
var valLen = buffer[0] & 0xF;
long k = 0;
long v = 0;
buffer[1..(1 + keyLen)].CopyTo(MemoryMarshal.AsBytes(new Span<long>(ref k)));
buffer[(1 + keyLen)..(1 + keyLen + valLen)].CopyTo(MemoryMarshal.AsBytes(new Span<long>(ref v)));
return (k, v);
}
public static bool Set(Span<byte> page, long key, long val)
{
Debug.Assert(page.Length == 8192);
// header is 4 bytes [bottom, top]
ref var bottom = ref Unsafe.AsRef(MemoryMarshal.Cast<byte, ushort>(page)[0]);
ref var top = ref Unsafe.AsRef(MemoryMarshal.Cast<byte, ushort>(page)[1]);
if (bottom == 0) // init empty page
{
top = 8192; // top of the page
bottom = 4; // leave 4 bytes for the bottom/top header
}
var usableSpace = top - bottom;
Span<byte> temp = stackalloc byte[17]; // max 8 bytes per, plus 1
var reqEntryLen = Encode(temp, key, val);
var idx = BinarySearch(page, key);
Span<ushort> offsets;
if (idx >= 0)
{
// let's check if we can update?
offsets = OffsetsFor(page);
var entryOffset = offsets[idx];
var curEntryLen = DecodeEntryLength(page[entryOffset..]);
if (reqEntryLen <= curEntryLen) // can fit with out needing new allocation
{
// can fit, just copy & done
temp[..reqEntryLen].CopyTo(page[entryOffset..]);
return true;
}
// is there enough space for a new entry?
if (reqEntryLen > usableSpace)
return false;
}
else // increase size of offsets array
{
// is there enough space for a new entry + it's offset?
if (reqEntryLen + sizeof(ushort) > usableSpace)
return false;
idx = ~idx;
bottom += 2;
offsets = OffsetsFor(page);
offsets[idx..(offsets.Length - 1)].CopyTo(offsets[(idx + 1)..]);
}
top -= (ushort)reqEntryLen;
offsets[idx] = top; // new data location
temp[..reqEntryLen].CopyTo(page[top..]);
return true;
}
public static bool TryGetValue(Span<byte> page, long key, out long val)
{
var idx = BinarySearch(page, key);
if (idx < 0) // update
{
val = default;
return false;
}
var offsets = OffsetsFor(page);
var entryOffset = offsets[idx];
(_, val) = Decode(page[entryOffset..]);
return true;
}
}
public static class OffloadedNibble
{
private static int BinarySearch(Span<byte> page, long key)
{
var offsets = OffsetsFor(page);
int lo = 0, hi = offsets.Length - 1;
while (lo <= hi)
{
int mid = (lo + hi) >> 1;
var entryOffset = offsets[mid];
var (cur, _) = Decode(page, entryOffset);
int cmp = key.CompareTo(cur);
if (cmp == 0)
return mid;
if (cmp < 0)
lo = mid + 1;
else
hi = mid - 1;
}
// where it is *supposed* to go
return ~lo;
}
private static Span<ushort> OffsetsFor(Span<byte> page)
{
var asShorts = MemoryMarshal.Cast<byte, ushort>(page);
var bottom = asShorts[0];
var top = asShorts[1]; // unused here, included for clarity
return MemoryMarshal.CreateSpan(ref asShorts[2], (bottom - 4) / 2);
}
private static (int EntrySize, byte SizeNibble) Encode(Span<byte> buffer, long key, long val)
{
var keyLen = (int)(8 - Lzcnt.X64.LeadingZeroCount((ulong)key) / 8);
var valLen = (int)(8 - Lzcnt.X64.LeadingZeroCount((ulong)val) / 8);
byte inlined = (keyLen, valLen) switch
{
(1, 1) => 1,
(2, 2) => 2,
(3, 3) => 3,
(4, 4) => 4,
(2, 1) => 5,
(2, 3) => 6,
(2, 4) => 7,
(3, 2) => 8,
(3, 4) => 9,
(3, 5) => 10,
(4, 2) => 11,
(4, 3) => 12,
(5, 4) => 13,
(5, 5) => 14,
(4, 5) => 15,
// note the trickery here with increment the write offset
_ => 0
};
var writeOffset = 0;
if (inlined == 0) // cannot fit, need to write to entry itself
{
writeOffset = 1;
buffer[0] = (byte)(keyLen << 4 | valLen);
}
MemoryMarshal.AsBytes(new Span<long>(ref key))[..keyLen].CopyTo(buffer[writeOffset..]);
MemoryMarshal.AsBytes(new Span<long>(ref val))[..valLen].CopyTo(buffer[(keyLen + writeOffset)..]);
return (writeOffset + keyLen + valLen, inlined);
}
private static int DecodeEntryLength(Span<byte> buffer)
{
var keyLen = buffer[0] >> 4;
var valLen = buffer[0] & 0xF;
return keyLen + valLen + 1;
}
private static (long Key, long Val) Decode(Span<byte> buffer, ushort offset)
{
var offsetInPage = ((offset & 0xFFF0) >> 3);
var (keyLen, valLen) = (offset & 0xF) switch
{
1 => (1, 1),
2 => (2, 2),
3 => (3, 3),
4 => (4, 4),
5 => (2, 1),
6 => (2, 3),
7 => (2, 4),
8 => (3, 2),
9 => (3, 4),
10 => (3, 5),
11 => (4, 2),
12 => (4, 3),
13 => (5, 4),
14 => (5, 5),
15 => (4, 5),
_ => (buffer[offsetInPage] >> 4, buffer[offsetInPage++] & 0xF), // note, inc offsetInPage
};
long k = 0;
long v = 0;
var entry = buffer.Slice(offsetInPage);
entry[..keyLen].CopyTo(MemoryMarshal.AsBytes(new Span<long>(ref k)));
entry[(keyLen)..(keyLen + valLen)].CopyTo(MemoryMarshal.AsBytes(new Span<long>(ref v)));
return (k, v);
}
static ReadOnlySpan<byte> Table => new byte[16]
{
(1 << 4 | 1),(2 << 4 | 2),(3 << 4 | 3),(4 << 4 | 4),
(2 << 4 | 1),/*dup:(2 << 4 | 2),*/(2 << 4 | 3),(2 << 4 | 4),
/*rare:(3 << 4 | 1),*/(3 << 4 | 2),/*dup:(3 << 4 | 3),*/(3 << 4 | 4),(3 << 4 | 5),
/*rare:(4 << 4 | 1),*/(4 << 4 | 2),(4 << 4 | 3),/*dup:(4 << 4 | 4),*/
(5 << 4 | 4),(5 << 4 | 5),(4 << 4 | 5),
0, // reserved
};
static ReadOnlySpan<byte> OffsetTable => new byte[16]
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0 ,0, 0, 1
};
[SkipLocalsInit]
private unsafe static (long Key, long Val) Decode4(byte* buffer, byte indx)
{
// vmovupd xmm0, [addr-in-data-section]
// vpinsrb xmm0, xmm0, [rdx], 0xf
var table = Vector128.Create(Table);
var final = table.WithElement(15, *buffer);
// vmovd xmm1, eax
// vpbroadcastb xmm1, xmm1
var mask = Vector128.Create(indx);
// vpshufb xmm0, xmm0, xmm1
var shuffle = Ssse3.Shuffle(final, mask);
// vpextrb edi, xmm0, 0
int header = Sse41.Extract(shuffle, 0);
buffer += Unsafe.AddByteOffset(ref MemoryMarshal.GetReference(OffsetTable), indx);
var keyLen = (header >> 4);
var valLen = (header & 0xF);
var bufferStart = buffer - 8;
ulong k = *(ulong*)bufferStart;
k ^= ulong.MaxValue << (keyLen * 8);
ulong v = *(ulong*)(bufferStart + keyLen);
v ^= ulong.MaxValue << (valLen * 8);
return ((long)k, (long)v);
}
private unsafe static (long Key, long Val) Decode2(byte* buffer, int indx)
{
var hasOffset = indx == 15;
var header = hasOffset ? *buffer : Table[indx];
var offset = *(byte*)&hasOffset;
var keyLen = (uint)(header >> 4);
var valLen = (uint)(header & 0xF);
long k = 0;
long v = 0;
Unsafe.CopyBlock(&k, buffer + offset, keyLen);
Unsafe.CopyBlock(&v, buffer + offset + keyLen, valLen);
return (k, v);
}
public static bool Set(Span<byte> page, long key, long val)
{
Debug.Assert(page.Length == 8192);
// header is 4 bytes [bottom, top]
ref var bottom = ref Unsafe.AsRef(MemoryMarshal.Cast<byte, ushort>(page)[0]);
ref var top = ref Unsafe.AsRef(MemoryMarshal.Cast<byte, ushort>(page)[1]);
if (bottom == 0) // init empty page
{
top = 8192; // top of the page
bottom = 4; // leave 4 bytes for the bottom/top header
}
var usableSpace = top - bottom;
Span<byte> temp = stackalloc byte[17]; // max 8 bytes per, plus 1
var (reqEntryLen, nibble) = Encode(temp, key, val);
var idx = BinarySearch(page, key);
Span<ushort> offsets;
if (idx >= 0)
{
// let's check if we can update?
offsets = OffsetsFor(page);
var actualEntryOffset = ((offsets[idx] & 0xFFF0) >> 3);
var curEntryLen = DecodeEntryLength(page[actualEntryOffset..]);
if (reqEntryLen <= curEntryLen) // can fit with out needing new allocation
{
// can fit, just copy & done
offsets[idx] = (ushort)(actualEntryOffset << 3 | nibble);
temp[..reqEntryLen].CopyTo(page[actualEntryOffset..]);
return true;
}
// is there enough space for a new entry?
if (reqEntryLen > usableSpace)
return false;
}
else // increase size of offsets array
{
var expectedStart = (top - reqEntryLen) & ~1; // aligned on 2 bytes boundary
// is there enough space for a new entry + it's offset?
if (bottom + sizeof(ushort) > expectedStart)
return false;
idx = ~idx;
bottom += 2;
offsets = OffsetsFor(page);
offsets[idx..(offsets.Length - 1)].CopyTo(offsets[(idx + 1)..]);
}
top = (ushort)((top - reqEntryLen) & ~1); // align on two bytes boundary
offsets[idx] = (ushort)(top << 3 | nibble);
temp[..reqEntryLen].CopyTo(page[top..]);
return true;
}
public static bool TryGetValue(Span<byte> page, long key, out long val)
{
var idx = BinarySearch(page, key);
if (idx < 0) // update
{
val = default;
return false;
}
var offsets = OffsetsFor(page);
var entryOffset = offsets[idx];
(_, val) = Decode(page, entryOffset);
return true;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment