Create a gist now

Instantly share code, notes, and snippets.

Sort one million 8-digit numbers in 1MB RAM
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
typedef unsigned int u32;
typedef unsigned long long u64;
//-------------------------------------------------------------------------
// WorkArea
//-------------------------------------------------------------------------
namespace WorkArea
{
static const u32 circularSize = 253250;
u32 circular[circularSize] = { 0 }; // consumes 1013000 bytes
static const u32 stageSize = 8000;
u32 stage[stageSize]; // consumes 32000 bytes
u32* headPtr = circular;
u32 numDeltas = 0;
u32 numStaged = 0;
}
//-------------------------------------------------------------------------
// Lookup table
//-------------------------------------------------------------------------
static const u32 LUTsize = 64;
const u32 LUT[LUTsize] = {
0x00000000, 0x0288df0d, 0x050b5170, 0x07876772,
0x09fd3131, 0x0c6cbea6, 0x0ed61f9d, 0x113963bd,
0x13969a84, 0x15edd348, 0x183f1d3b, 0x1a8a8766,
0x1cd020ad, 0x1f0ff7cc, 0x214a1b5e, 0x237e99d4,
0x25ad817f, 0x27d6e088, 0x29fac4f7, 0x2c193cad,
0x2e32556d, 0x30461cd1, 0x3254a056, 0x345ded53,
0x366210ff, 0x3861186f, 0x3a5b1096, 0x3c500649,
0x3e40063a, 0x402b1cfa, 0x421156fd, 0x43f2c095,
0x45cf65f7, 0x47a75337, 0x497a944b, 0x4b49350b,
0x4d134131, 0x4ed8c45a, 0x5099ca03, 0x52565d8e,
0x540e8a41, 0x55c25b43, 0x5771dba1, 0x591d1649,
0x5ac41611, 0x5c66e5b1, 0x5e058fc6, 0x5fa01ed4,
0x61369d41, 0x62c9155d, 0x6457915a, 0x65e21b51,
0x6768bd44, 0x68eb8119, 0x6a6a709d, 0x6be59584,
0x6d5cf96c, 0x6ed0a5d9, 0x7040a435, 0x71acfdd4,
0x7315bbf3, 0x747ae7b7, 0x75dc8a2c, 0x773aac4a
};
static const u32 bit2 = 1u << 30;
//-------------------------------------------------------------------------
// Interval
//-------------------------------------------------------------------------
struct Interval
{
u32 lo;
u32 range;
Interval() { lo = 0; range = ~0u; }
void set(u32 l, u32 h) { lo = l; range = h - l; }
u32 lerp(u64 x) { return lo + (u32) ((range * x) >> 32); }
};
//-------------------------------------------------------------------------
// BitReader
//-------------------------------------------------------------------------
struct BitReader
{
u32* readPtr;
u32 readBit;
BitReader()
{
readPtr = WorkArea::circular;
readBit = 32;
}
u32 readOneBit()
{
u32 bit = (*readPtr >> --readBit) & 1;
if (readBit == 0)
{
readBit = 32;
if (++readPtr == WorkArea::circular + WorkArea::circularSize)
readPtr = WorkArea::circular;
}
return bit;
}
};
//-------------------------------------------------------------------------
// Decoder
//-------------------------------------------------------------------------
struct Decoder : BitReader
{
Interval readInterval;
u32 readSeq;
u32 readSeqBits;
Decoder() : BitReader()
{
readSeq = 0;
readSeqBits = 1;
}
u32 decode()
{
for (; readSeqBits < 32; readSeqBits++)
readSeq = (readSeq << 1) | readOneBit();
u32 a = 0;
u32 b = LUTsize;
u32 readRel = readSeq - readInterval.lo;
u32 relA = 0;
u32 relB = readInterval.range;
while (b > a + 1)
{
u32 mid = (a + b) >> 1;
u32 rel = readInterval.lerp(LUT[mid]) - readInterval.lo;
if (readRel >= rel)
{
a = mid;
relA = rel;
}
else
{
b = mid;
relB = rel;
}
}
u32 A = relA + readInterval.lo;
u32 B = relB + readInterval.lo;
assert(A != B);
while ((int) (A ^ B) >= 0)
{
readSeqBits--;
A <<= 1;
B <<= 1;
}
if ((int) readInterval.lo >= 0)
assert((int) A >= 0 && (int) B < 0);
while ((A & bit2) && !(B & bit2))
{
readSeqBits--;
A <<= 1;
B <<= 1;
}
readInterval.set(A, B);
return a;
}
u32 popDelta()
{
u32 value = 0;
for (;;)
{
u32 t = decode();
value += t;
if (t < LUTsize - 1)
return value;
}
}
void reset()
{
readPtr = WorkArea::headPtr;
readBit = 32;
readInterval.set(0, ~0u);
readSeq = 0;
readSeqBits = 0;
}
};
Decoder g_Decoder;
//-------------------------------------------------------------------------
// BitWriter
//-------------------------------------------------------------------------
struct BitWriter
{
u32* writePtr;
u32 writeBit;
BitWriter()
{
writePtr = WorkArea::circular;
writeBit = 32;
}
void writeOneBit(u32 bit) // 0 or 1
{
*writePtr |= bit << --writeBit;
if (writeBit == 0)
{
writeBit = 32;
if (++writePtr == WorkArea::circular + WorkArea::circularSize)
writePtr = WorkArea::circular;
if (writePtr == g_Decoder.readPtr)
abort(); // Overflow detection
*writePtr = 0;
}
}
};
//-------------------------------------------------------------------------
// Encoder
//-------------------------------------------------------------------------
struct Encoder : BitWriter
{
Interval writeInterval;
void addCarry()
{
u32* w = writePtr;
for (u32 b = 2u << (writeBit - 1);; b <<= 1)
{
if (b == 0)
{
b = 1;
if (--w == WorkArea::circular - 1)
w = WorkArea::circular + WorkArea::circularSize - 1;
}
*w ^= b;
if (*w & b)
break;
}
}
void encode(u32 value)
{
u32 A = writeInterval.lerp(LUT[value]);
u32 B = writeInterval.lerp(value < LUTsize - 1 ? LUT[value + 1] : 1ull << 32);
assert(A != B);
if ((int) writeInterval.lo < 0 && (int) A >= 0)
addCarry();
while ((int) (A ^ B) >= 0)
{
writeOneBit(A >> 31);
A <<= 1;
B <<= 1;
}
if ((int) writeInterval.lo >= 0)
assert((int) A >= 0 && (int) B < 0);
while ((A & bit2) && !(B & bit2))
{
writeOneBit(A >> 31);
A <<= 1;
B <<= 1;
}
writeInterval.set(A, B);
}
void pushDelta(u32 delta)
{
while (delta >= LUTsize - 1)
{
encode(LUTsize - 1); // Use the [LUT(63), 1) subinterval
delta -= LUTsize - 1;
}
encode(delta);
}
void flush()
{
encode(LUTsize / 2);
for (u32 i = 32; --i > 0;)
writeOneBit((writeInterval.lo >> i) & 1);
while (writeBit != 32)
writeOneBit(0);
writeInterval.set(0, ~0u);
}
};
Encoder g_Encoder;
//-------------------------------------------------------------------------
// mergeStageToCircular
//-------------------------------------------------------------------------
inline int u32Compare(const u32* a, const u32* b) { return *a - *b; }
void mergeStageToCircular()
{
using namespace WorkArea;
qsort(stage, numStaged, 4, (int (*)(const void*, const void*)) u32Compare);
u32 i = 0;
u32 j = 0;
u32 prev = 0;
u32 iValue = i < numDeltas ? g_Decoder.popDelta() : ~0u;
u32 jValue = j < numStaged ? stage[j] : ~0u;
while (i < numDeltas || j < numStaged)
{
if (iValue < jValue)
{
g_Encoder.pushDelta(iValue - prev);
prev = iValue;
iValue = ++i < numDeltas ? iValue + g_Decoder.popDelta() : ~0u;
}
else
{
g_Encoder.pushDelta(jValue - prev);
prev = jValue;
jValue = ++j < numStaged ? stage[j] : ~0u;
}
}
numDeltas += numStaged;
numStaged = 0;
g_Encoder.flush();
g_Decoder.reset();
headPtr = g_Encoder.writePtr;
}
//-------------------------------------------------------------------------
// main
//-------------------------------------------------------------------------
int main(int argc, char* argv[])
{
// Read input
for (;;)
{
int value;
if (scanf("%d", &value) != 1)
break;
if (WorkArea::numStaged >= WorkArea::stageSize)
mergeStageToCircular();
WorkArea::stage[WorkArea::numStaged++] = value;
}
if (WorkArea::numStaged > 0)
mergeStageToCircular();
// Write output
u32 value = 0;
for (u32 i = 0; i < WorkArea::numDeltas; i++)
{
value += g_Decoder.popDelta();
printf("%08d\n", value);
}
return 0;
}
@preshing
Owner

Changes in revision 028d93:

  • Improved LUT. The old one worked fine, but the new one improves compression by a few bits, and is easier to explain.
  • Renamed Range to Interval.
  • Improved set bit in BitWriter::writeOneBit.
  • No need to subtract one from second argument when calling Interval::set.
  • Tweaked lerp function.
  • Renamed seqA to A, seqB to B.
  • Add missing return statement to main.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment