Skip to content

Instantly share code, notes, and snippets.

@Geograph-us
Created November 11, 2023 15:02
Show Gist options
  • Save Geograph-us/6b82845a6112e690862cba09c00a285d to your computer and use it in GitHub Desktop.
Save Geograph-us/6b82845a6112e690862cba09c00a285d to your computer and use it in GitHub Desktop.
ulong (UInt64) compression based on https://github.com/wolfgarbe/EliasFanoCompression
byte[] EliasFanoCompress(ulong[] UlongsArray)
{
long maxCompressedSize = (long)((2 + Math.Ceiling(Math.Log((double)UlongsArray[UlongsArray.Length - 1] / (double)UlongsArray.Length, 2))) * UlongsArray.Length / 8) + 6;
byte[] compressedBuffer = new byte[maxCompressedSize];
ulong compressedBufferPointer2 = 0;
ulong compressedBufferPointer1 = 0;
UInt64 lastID = 0;
UInt64 buffer1 = 0;
UInt64 bufferLength1 = 0;
UInt64 buffer2 = 0;
UInt64 bufferLength2 = 0;
ulong largestblockID = (ulong)UlongsArray[UlongsArray.Length - 1];
double averageDelta = (double)largestblockID / (double)UlongsArray.Length;
double averageDeltaLog = Math.Log(averageDelta, 2);
ulong lowBitsLength = (ulong)Math.Floor(averageDeltaLog);
if (lowBitsLength < 0) lowBitsLength = 0;
ulong lowBitsMask = (((ulong)1 << (int)lowBitsLength) - 1);
compressedBufferPointer2 = lowBitsLength * (ulong)UlongsArray.Length / 8 + 6;
compressedBuffer[compressedBufferPointer1++] = (byte)(UlongsArray.Length & 255);
compressedBuffer[compressedBufferPointer1++] = (byte)((UlongsArray.Length >> 8) & 255);
compressedBuffer[compressedBufferPointer1++] = (byte)((UlongsArray.Length >> 16) & 255);
compressedBuffer[compressedBufferPointer1++] = (byte)((UlongsArray.Length >> 24) & 255);
compressedBuffer[compressedBufferPointer1++] = (byte)lowBitsLength;
foreach (ulong docID in UlongsArray)
{
ulong docIdDelta = (docID - lastID - 1);
buffer1 <<= (int)lowBitsLength;
buffer1 |= (docIdDelta & lowBitsMask);
bufferLength1 += lowBitsLength;
while (bufferLength1 > 7)
{
bufferLength1 -= 8;
compressedBuffer[compressedBufferPointer1++] = (byte)(buffer1 >> (int)bufferLength1);
}
ulong unaryCodeLength = (ulong)(docIdDelta >> (int)lowBitsLength) + 1;
buffer2 <<= (int)unaryCodeLength;
buffer2 |= 1;
bufferLength2 += unaryCodeLength;
while (bufferLength2 > 7)
{
bufferLength2 -= 8;
compressedBuffer[compressedBufferPointer2++] = (byte)(buffer2 >> (int)bufferLength2);
}
lastID = docID;
}
if (bufferLength1 > 0) compressedBuffer[compressedBufferPointer1++] = (byte)(buffer1 << (int)(8 - bufferLength1));
if (bufferLength2 > 0) compressedBuffer[compressedBufferPointer2++] = (byte)(buffer2 << (int)(8 - bufferLength2));
Array.Resize(ref compressedBuffer, (int)compressedBufferPointer2);
return compressedBuffer;
}
public static UInt64[,] decodingTableHighBits = new UInt64[256, 8];
public static byte[] decodingTableNumber = new byte[256];
public static byte[] decodingTableHighBitsCarryover = new byte[256];
UInt64[] EliasFanoDecompress(byte[] compressedBuffer)
{
if (decodingTableHighBitsCarryover[0] == 0)
{
for (int i = 0; i < 256; i++)
{
byte zeroCount = 0;
for (int j = 7; j >= 0; j--)
{
if ((i & (1 << j)) > 0)
{
decodingTableHighBits[i, decodingTableNumber[i]] = zeroCount;
decodingTableNumber[i]++;
zeroCount = 0;
}
else zeroCount++;
}
decodingTableHighBitsCarryover[i] = zeroCount;
}
}
UInt64 compressedBufferPointer = (ulong)compressedBuffer.LongLength;
UInt64 resultPointer = 0;
ulong lowBitsPointer = 0;
ulong lastDocID = 0;
ulong docID = 0;
UInt64 postingListCount = compressedBuffer[lowBitsPointer++];
postingListCount |= (UInt64)compressedBuffer[lowBitsPointer++] << 8;
postingListCount |= (UInt64)compressedBuffer[lowBitsPointer++] << 16;
postingListCount |= (UInt64)compressedBuffer[lowBitsPointer++] << 24;
ulong[] decompressedUlongs = new ulong[postingListCount];
byte lowBitsLength = compressedBuffer[lowBitsPointer++];
byte lowBitsCount = 0;
byte lowBits = 0;
byte cb = 1;
for (UInt64 highBitsPointer = lowBitsLength * postingListCount / 8 + 6; highBitsPointer < compressedBufferPointer; highBitsPointer++)
{
docID += decodingTableHighBitsCarryover[cb];
cb = compressedBuffer[highBitsPointer];
UInt64 docIdNumber = decodingTableNumber[cb];
for (UInt64 i = 0; i < docIdNumber; i++)
{
docID <<= lowBitsCount;
docID |= lowBits & ((1u << lowBitsCount) - 1u);
while (lowBitsCount < lowBitsLength)
{
docID <<= 8;
lowBits = compressedBuffer[lowBitsPointer++];
docID |= lowBits;
lowBitsCount += 8;
}
lowBitsCount -= lowBitsLength;
docID >>= lowBitsCount;
docID += (decodingTableHighBits[cb, i] << lowBitsLength) + lastDocID + 1u;
decompressedUlongs[resultPointer++] = docID;
lastDocID = docID;
docID = 0;
}
}
return decompressedUlongs;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment