Last active
September 26, 2023 19:17
-
-
Save terrelln/1374bf868af850e7ee1ff7cd460a0868 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <string_view> | |
#include <vector> | |
#include <folly/ScopeGuard.h> | |
#define ZSTD_STATIC_LINKING_ONLY | |
#include <zstd.h> | |
namespace { | |
size_t throwIfZstdError(size_t result) | |
{ | |
if (ZSTD_isError(result)) { | |
throw std::runtime_error{ZSTD_getErrorName(result)}; | |
} | |
return result; | |
} | |
size_t constexpr kBlockHeaderSize = 3; | |
size_t constexpr kMinChunkSize = ZSTD_COMPRESSBOUND(ZSTD_BLOCKSIZE_MAX); | |
std::string compressFixedSizeChunk(ZSTD_CCtx* cctx, std::string_view& data, size_t chunkSize) | |
{ | |
if (chunkSize < kMinChunkSize) { | |
// Avoid problems where the frame overhead is too large to fit in a chunk | |
throw std::runtime_error{"This function is intended to work with large chunk sizes"}; | |
} | |
std::string compressed(chunkSize, '\0'); | |
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); | |
// Write the compressed porition of the block | |
ZSTD_outBuffer out = { compressed.data(), compressed.size(), 0 }; | |
ZSTD_inBuffer in = { data.data(), 0, 0 }; | |
size_t inPos = 0; | |
size_t const maxCompressedSize = chunkSize - ZSTD_SKIPPABLEHEADERSIZE - 2 * kBlockHeaderSize; | |
while (out.pos < maxCompressedSize && inPos < data.size()) { | |
// Compute a block size that guarantees we won't surpass chunkSize | |
size_t blockSize = std::min<size_t>(data.size() - inPos, maxCompressedSize - out.pos); | |
blockSize = std::min<size_t>(blockSize, ZSTD_BLOCKSIZE_MAX); | |
in.size += blockSize; | |
for (;;) { | |
size_t const result = throwIfZstdError(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush)); | |
if (result == 0) { | |
break; | |
} | |
} | |
assert(in.pos == in.size); | |
inPos += blockSize; | |
} | |
// End the frame | |
for (;;) { | |
assert(in.pos == in.size); | |
size_t const result = throwIfZstdError(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end)); | |
if (result == 0) { | |
break; | |
} | |
} | |
assert(out.pos + ZSTD_SKIPPABLEHEADERSIZE <= chunkSize); | |
// Pad all but the last chunk with a skippable frame | |
if (inPos != data.size()) { | |
size_t const skippableSize = chunkSize - ZSTD_SKIPPABLEHEADERSIZE - out.pos; | |
std::string zeros(skippableSize, '\0'); | |
out.pos += throwIfZstdError(ZSTD_writeSkippableFrame(compressed.data() + out.pos, out.size - out.pos, zeros.data(), zeros.size())); | |
assert(out.pos == chunkSize); | |
} | |
assert(out.pos <= chunkSize); | |
compressed.resize(out.pos); | |
// Consume the input | |
data = data.substr(inPos); | |
return compressed; | |
} | |
} | |
std::vector<std::string> compressWithFixedSizeChunks(std::string_view data, size_t chunkSize, int level = 0) | |
{ | |
ZSTD_CCtx* cctx = ZSTD_createCCtx(); | |
SCOPE_EXIT { | |
ZSTD_freeCCtx(cctx); | |
}; | |
// Set compression parameters | |
throwIfZstdError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level)); | |
// Configure Zstd to remove all overhead from streaming compression | |
throwIfZstdError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_stableInBuffer, 1)); | |
throwIfZstdError(ZSTD_CCtx_setParameter(cctx, ZSTD_c_stableOutBuffer, 1)); | |
std::vector<std::string> chunks; | |
while (!data.empty()) { | |
chunks.push_back(compressFixedSizeChunk(cctx, data, chunkSize)); | |
} | |
return chunks; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment