Skip to content

Instantly share code, notes, and snippets.

@cruppstahl cruppstahl/Makefile
Last active Feb 23, 2016

Embed
What would you like to do?
codeproject.com: An introduction to integer compression
# requires MaskedVbyte from github.com/lemire/MaskedVByet
# requires libfor from github.com/cruppstahl/libfor
all:
g++ test.cc -o test -Wall -I .. \
../MaskedVByte/varintdecode.o \
../MaskedVByte/varintencode.o \
../libfor/libfor.a
// Written by Christoph Rupp, chris@crupp.de
// Source code for http://www.codeproject.com/Tips/1080308/An-introduction-to-integer-compression
// upscaledb (http://upscaledb.com) is a key/value store with built-in integer compression.
#include <iostream>
#include <vector>
#include <assert.h>
#include <stdint.h>
#include <libfor/for.h>
extern "C" {
#include <MaskedVByte/include/varintdecode.h>
#include <MaskedVByte/include/varintencode.h>
};
static std::vector<uint32_t> input;
static void
test_for()
{
// number of bytes required to compress 'input'
uint32_t bytes_reqd = for_compressed_size_sorted(&input[0], input.size());
// allocate storage for the compressed data
std::vector<uint8_t> compressed(bytes_reqd);
// compress the sequence
uint32_t used = for_compress_sorted(&input[0], &compressed[0], input.size());
std::cout << "libfor: compressed " << input.size() << " integers ("
<< input.size() * 4 << " bytes) to "
<< used << " bytes" << std::endl;
// now we can perform operations directly on the compressed data, i.e.
// appending a value, searching for a value or selecting a value at
// a specific position:
uint32_t v = for_select(&compressed[0], 3);
std::cout << "integer at position 3: " << v << std::endl;
// and of course we can uncompress it again
std::vector<uint32_t> output(input.size());
for_uncompress(&compressed[0], &output[0], input.size());
assert(input == output);
}
static void
test_maskedvbyte()
{
// Perform a one-time initialization
simdvbyteinit();
// allocate storage for the encoded data
std::vector<uint8_t> compressed(input.size() * 5);
// encode the sequence
size_t used = vbyte_encode_delta(&input[0], input.size(), &compressed[0], 0);
std::cout << "MVByte: compressed " << input.size() << " integers ("
<< input.size() * 4 << " bytes) to "
<< used << " bytes" << std::endl;
// now we can perform operations directly on the compressed data, i.e.
// selecting a value at a specific position:
uint32_t v = masked_vbyte_select_delta(&compressed[0], input.size(), 0, 3);
std::cout << "integer at position 3: " << v << std::endl;
// and of course we can uncompress it again
std::vector<uint32_t> output(input.size());
masked_vbyte_decode_delta(&compressed[0], &output[0], input.size(), 0);
assert(input == output);
}
int
main()
{
// initialize input
for (uint32_t i = 0; i < 1000; i++)
input.push_back(10 + i);
test_for();
test_maskedvbyte();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.