Last active
February 23, 2016 08:06
-
-
Save cruppstahl/012b89dac240ec5d7859 to your computer and use it in GitHub Desktop.
codeproject.com: An introduction to integer compression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# requires MaskedVbyte from github.com/lemire/MaskedVByet | |
# requires libfor from github.com/cruppstahl/libfor | |
all: | |
g++ test.cc -o test -Wall -I .. \ | |
../MaskedVByte/varintdecode.o \ | |
../MaskedVByte/varintencode.o \ | |
../libfor/libfor.a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Written by Christoph Rupp, chris@crupp.de | |
// Source code for http://www.codeproject.com/Tips/1080308/An-introduction-to-integer-compression | |
// upscaledb (http://upscaledb.com) is a key/value store with built-in integer compression. | |
#include <iostream> | |
#include <vector> | |
#include <assert.h> | |
#include <stdint.h> | |
#include <libfor/for.h> | |
extern "C" { | |
#include <MaskedVByte/include/varintdecode.h> | |
#include <MaskedVByte/include/varintencode.h> | |
}; | |
static std::vector<uint32_t> input; | |
static void | |
test_for() | |
{ | |
// number of bytes required to compress 'input' | |
uint32_t bytes_reqd = for_compressed_size_sorted(&input[0], input.size()); | |
// allocate storage for the compressed data | |
std::vector<uint8_t> compressed(bytes_reqd); | |
// compress the sequence | |
uint32_t used = for_compress_sorted(&input[0], &compressed[0], input.size()); | |
std::cout << "libfor: compressed " << input.size() << " integers (" | |
<< input.size() * 4 << " bytes) to " | |
<< used << " bytes" << std::endl; | |
// now we can perform operations directly on the compressed data, i.e. | |
// appending a value, searching for a value or selecting a value at | |
// a specific position: | |
uint32_t v = for_select(&compressed[0], 3); | |
std::cout << "integer at position 3: " << v << std::endl; | |
// and of course we can uncompress it again | |
std::vector<uint32_t> output(input.size()); | |
for_uncompress(&compressed[0], &output[0], input.size()); | |
assert(input == output); | |
} | |
static void | |
test_maskedvbyte() | |
{ | |
// Perform a one-time initialization | |
simdvbyteinit(); | |
// allocate storage for the encoded data | |
std::vector<uint8_t> compressed(input.size() * 5); | |
// encode the sequence | |
size_t used = vbyte_encode_delta(&input[0], input.size(), &compressed[0], 0); | |
std::cout << "MVByte: compressed " << input.size() << " integers (" | |
<< input.size() * 4 << " bytes) to " | |
<< used << " bytes" << std::endl; | |
// now we can perform operations directly on the compressed data, i.e. | |
// selecting a value at a specific position: | |
uint32_t v = masked_vbyte_select_delta(&compressed[0], input.size(), 0, 3); | |
std::cout << "integer at position 3: " << v << std::endl; | |
// and of course we can uncompress it again | |
std::vector<uint32_t> output(input.size()); | |
masked_vbyte_decode_delta(&compressed[0], &output[0], input.size(), 0); | |
assert(input == output); | |
} | |
int | |
main() | |
{ | |
// initialize input | |
for (uint32_t i = 0; i < 1000; i++) | |
input.push_back(10 + i); | |
test_for(); | |
test_maskedvbyte(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment