Skip to content

Instantly share code, notes, and snippets.

@cruppstahl
Last active February 23, 2016 08:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cruppstahl/012b89dac240ec5d7859 to your computer and use it in GitHub Desktop.
Save cruppstahl/012b89dac240ec5d7859 to your computer and use it in GitHub Desktop.
codeproject.com: An introduction to integer compression
# requires MaskedVbyte from github.com/lemire/MaskedVByet
# requires libfor from github.com/cruppstahl/libfor
all:
g++ test.cc -o test -Wall -I .. \
../MaskedVByte/varintdecode.o \
../MaskedVByte/varintencode.o \
../libfor/libfor.a
// Written by Christoph Rupp, chris@crupp.de
// Source code for http://www.codeproject.com/Tips/1080308/An-introduction-to-integer-compression
// upscaledb (http://upscaledb.com) is a key/value store with built-in integer compression.
#include <iostream>
#include <vector>
#include <assert.h>
#include <stdint.h>
#include <libfor/for.h>
extern "C" {
#include <MaskedVByte/include/varintdecode.h>
#include <MaskedVByte/include/varintencode.h>
};
static std::vector<uint32_t> input;
static void
test_for()
{
// number of bytes required to compress 'input'
uint32_t bytes_reqd = for_compressed_size_sorted(&input[0], input.size());
// allocate storage for the compressed data
std::vector<uint8_t> compressed(bytes_reqd);
// compress the sequence
uint32_t used = for_compress_sorted(&input[0], &compressed[0], input.size());
std::cout << "libfor: compressed " << input.size() << " integers ("
<< input.size() * 4 << " bytes) to "
<< used << " bytes" << std::endl;
// now we can perform operations directly on the compressed data, i.e.
// appending a value, searching for a value or selecting a value at
// a specific position:
uint32_t v = for_select(&compressed[0], 3);
std::cout << "integer at position 3: " << v << std::endl;
// and of course we can uncompress it again
std::vector<uint32_t> output(input.size());
for_uncompress(&compressed[0], &output[0], input.size());
assert(input == output);
}
static void
test_maskedvbyte()
{
// Perform a one-time initialization
simdvbyteinit();
// allocate storage for the encoded data
std::vector<uint8_t> compressed(input.size() * 5);
// encode the sequence
size_t used = vbyte_encode_delta(&input[0], input.size(), &compressed[0], 0);
std::cout << "MVByte: compressed " << input.size() << " integers ("
<< input.size() * 4 << " bytes) to "
<< used << " bytes" << std::endl;
// now we can perform operations directly on the compressed data, i.e.
// selecting a value at a specific position:
uint32_t v = masked_vbyte_select_delta(&compressed[0], input.size(), 0, 3);
std::cout << "integer at position 3: " << v << std::endl;
// and of course we can uncompress it again
std::vector<uint32_t> output(input.size());
masked_vbyte_decode_delta(&compressed[0], &output[0], input.size(), 0);
assert(input == output);
}
int
main()
{
// initialize input
for (uint32_t i = 0; i < 1000; i++)
input.push_back(10 + i);
test_for();
test_maskedvbyte();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment