Skip to content

Instantly share code, notes, and snippets.

@terrelln
Last active September 26, 2023 19:17
Show Gist options
  • Save terrelln/1374bf868af850e7ee1ff7cd460a0868 to your computer and use it in GitHub Desktop.
Save terrelln/1374bf868af850e7ee1ff7cd460a0868 to your computer and use it in GitHub Desktop.
#include <string>
#include <string_view>
#include <vector>
#include <folly/ScopeGuard.h>
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>
namespace {
size_t throwIfZstdError(size_t result)
{
if (ZSTD_isError(result)) {
throw std::runtime_error{ZSTD_getErrorName(result)};
}
return result;
}
size_t constexpr kBlockHeaderSize = 3;
size_t constexpr kMinChunkSize = ZSTD_COMPRESSBOUND(ZSTD_BLOCKSIZE_MAX);
std::string compressFixedSizeChunk(ZSTD_CCtx* cctx, std::string_view& data, size_t chunkSize)
{
if (chunkSize < kMinChunkSize) {
// Avoid problems where the frame overhead is too large to fit in a chunk
throw std::runtime_error{"This function is intended to work with large chunk sizes"};
}
std::string compressed(chunkSize, '\0');
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
// Write the compressed porition of the block
ZSTD_outBuffer out = { compressed.data(), compressed.size(), 0 };
ZSTD_inBuffer in = { data.data(), 0, 0 };
size_t inPos = 0;
size_t const maxCompressedSize = chunkSize - ZSTD_SKIPPABLEHEADERSIZE - 2 * kBlockHeaderSize;
while (out.pos < maxCompressedSize && inPos < data.size()) {
// Compute a block size that guarantees we won't surpass chunkSize
size_t blockSize = std::min<size_t>(data.size() - inPos, maxCompressedSize - out.pos);
blockSize = std::min<size_t>(blockSize, ZSTD_BLOCKSIZE_MAX);
in.size += blockSize;
for (;;) {
size_t const result = throwIfZstdError(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
if (result == 0) {
break;
}
}
assert(in.pos == in.size);
inPos += blockSize;
}
// End the frame
for (;;) {
assert(in.pos == in.size);
size_t const result = throwIfZstdError(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end));
if (result == 0) {
break;
}
}
assert(out.pos + ZSTD_SKIPPABLEHEADERSIZE <= chunkSize);
// Pad all but the last chunk with a skippable frame
if (inPos != data.size()) {
size_t const skippableSize = chunkSize - ZSTD_SKIPPABLEHEADERSIZE - out.pos;
std::string zeros(skippableSize, '\0');
out.pos += throwIfZstdError(ZSTD_writeSkippableFrame(compressed.data() + out.pos, out.size - out.pos, zeros.data(), zeros.size()));
assert(out.pos == chunkSize);
}
assert(out.pos <= chunkSize);
compressed.resize(out.pos);
// Consume the input
data = data.substr(inPos);
return compressed;
}
}
std::vector<std::string> compressWithFixedSizeChunks(std::string_view data, size_t chunkSize, int level = 0)
{
ZSTD_CCtx* cctx = ZSTD_createCCtx();
SCOPE_EXIT {
ZSTD_freeCCtx(cctx);
};
// Set compression parameters
throwIfZstdError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level));
// Configure Zstd to remove all overhead from streaming compression
throwIfZstdError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_stableInBuffer, 1));
throwIfZstdError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_stableOutBuffer, 1));
std::vector<std::string> chunks;
while (!data.empty()) {
chunks.push_back(compressFixedSizeChunk(cctx, data, chunkSize));
}
return chunks;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment