Skip to content

Instantly share code, notes, and snippets.

@bsergean
Last active September 29, 2020 05:03
Show Gist options
  • Save bsergean/891b792f6900078db67145b05ffaca48 to your computer and use it in GitHub Desktop.
Save bsergean/891b792f6900078db67145b05ffaca48 to your computer and use it in GitHub Desktop.
/*
*
# each * represents a count of 40. total 5823
zlib [ 2775] *********************************************************************
zlib-ng [ 1808] *********************************************
libdeflate [ 1240] *******************************
*
$ clang++ -DHAVE_LIBDEFLATE=1 -O3 --std=c++14 --stdlib=libc++ gzip.cpp /usr/local/lib/libdeflate.a && ./a.out ~/Desktop/example_data.bin
median runtime to compress file: 1240
compressing file completed in 1225 us
$ clang++ -O3 --std=c++14 --stdlib=libc++ gzip.cpp /usr/local/lib/libz.a && ./a.out ~/Desktop/example_data.bin
median runtime to compress file: 1808
compressing file completed in 1848 us
$ clang++ -O3 --std=c++14 --stdlib=libc++ gzip.cpp /usr/lib/libz.dylib && ./a.out ~/Desktop/example_data.bin
median runtime to compress file: 2775
compressing file completed in 2999 us
*/
#include <chrono>
#include <stdint.h>
#include <string>
#include <iostream>
#include <fstream>
#include <vector>
#include <array>
#include <string.h>
#include <iostream>
#include <zlib.h>
#ifdef HAVE_LIBDEFLATE
#include <libdeflate.h>
#endif
class Bench
{
public:
Bench(const std::string& description);
~Bench();
void reset();
void record();
void report();
void setReported();
uint64_t getDuration() const;
private:
std::string _description;
std::chrono::time_point<std::chrono::high_resolution_clock> _start;
uint64_t _duration;
bool _reported;
};
Bench::Bench(const std::string& description)
: _description(description)
{
reset();
}
Bench::~Bench()
{
if (!_reported)
{
report();
}
}
void Bench::reset()
{
_start = std::chrono::high_resolution_clock::now();
_reported = false;
}
void Bench::report()
{
auto now = std::chrono::high_resolution_clock::now();
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(now - _start);
_duration = microseconds.count();
std::cerr << _description << " completed in " << _duration << " us" << std::endl;
setReported();
}
void Bench::record()
{
auto now = std::chrono::high_resolution_clock::now();
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(now - _start);
_duration = microseconds.count();
}
void Bench::setReported()
{
_reported = true;
}
uint64_t Bench::getDuration() const
{
return _duration;
}
std::string gzipCompress(const std::string& str)
{
#ifdef HAVE_LIBDEFLATE
int compressionLevel = 6;
struct libdeflate_compressor *compressor;
compressor =
libdeflate_alloc_compressor(compressionLevel);
const void *uncompressed_data = str.data();
size_t uncompressed_size = str.size();
void *compressed_data;
size_t actual_compressed_size;
size_t max_compressed_size;
max_compressed_size = libdeflate_gzip_compress_bound(compressor,
uncompressed_size);
compressed_data = malloc(max_compressed_size);
if (compressed_data == NULL)
{
return std::string();
}
actual_compressed_size = libdeflate_gzip_compress(
compressor,
uncompressed_data,
uncompressed_size,
compressed_data,
max_compressed_size);
if (actual_compressed_size == 0)
{
free(compressed_data);
return std::string();
}
libdeflate_free_compressor(compressor);
std::string out;
out.append(reinterpret_cast<char*>(compressed_data), actual_compressed_size);
free(compressed_data);
return out;
#else
z_stream zs; // z_stream is zlib's control structure
memset(&zs, 0, sizeof(zs));
// deflateInit2 configure the file format: request gzip instead of deflate
const int windowBits = 15;
const int GZIP_ENCODING = 16;
deflateInit2(&zs,
Z_DEFAULT_COMPRESSION,
Z_DEFLATED,
windowBits | GZIP_ENCODING,
8,
Z_DEFAULT_STRATEGY);
zs.next_in = (Bytef*) str.data();
zs.avail_in = (uInt) str.size(); // set the z_stream's input
int ret;
char outbuffer[32768];
std::string outstring;
// retrieve the compressed bytes blockwise
do
{
zs.next_out = reinterpret_cast<Bytef*>(outbuffer);
zs.avail_out = sizeof(outbuffer);
ret = deflate(&zs, Z_FINISH);
if (outstring.size() < zs.total_out)
{
// append the block to the output string
outstring.append(outbuffer, zs.total_out - outstring.size());
}
} while (ret == Z_OK);
deflateEnd(&zs);
return outstring;
#endif
}
std::pair<bool, std::vector<uint8_t>> load(const std::string& path)
{
std::vector<uint8_t> memblock;
std::ifstream file(path);
if (!file.is_open()) return std::make_pair(false, memblock);
file.seekg(0, file.end);
std::streamoff size = file.tellg();
file.seekg(0, file.beg);
memblock.resize((size_t) size);
file.read((char*) &memblock.front(), static_cast<std::streamsize>(size));
return std::make_pair(true, memblock);
}
std::pair<bool, std::string> readAsString(const std::string& path)
{
auto res = load(path);
auto vec = res.second;
return std::make_pair(res.first, std::string(vec.begin(), vec.end()));
}
int gzip(const std::string& filename, int runCount)
{
auto res = readAsString(filename);
bool found = res.first;
if (!found)
{
return 1;
}
std::string compressedBytes;
std::vector<uint64_t> durations;
{
Bench bench("compressing file");
bench.setReported();
for (int i = 0; i < runCount; ++i)
{
bench.reset();
compressedBytes = gzipCompress(res.second);
bench.record();
durations.push_back(bench.getDuration());
}
size_t medianIdx = durations.size() / 2;
uint64_t medianRuntime = durations[medianIdx];
std::cout << "median runtime to compress file: "
<< medianRuntime << std::endl;
}
std::string outputFilename(filename);
outputFilename += ".gz";
std::ofstream f;
f.open(outputFilename);
f << compressedBytes;
f.close();
return 0;
}
int main(int argc, char** argv)
{
return gzip(argv[1], 100);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment