Skip to content

Instantly share code, notes, and snippets.

@wolfv
Last active December 27, 2022 13:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wolfv/f322bd9fb0df48e9daafb7453cfc7379 to your computer and use it in GitHub Desktop.
Save wolfv/f322bd9fb0df48e9daafb7453cfc7379 to your computer and use it in GitHub Desktop.
Stream decompress a simple `zstd` or `bzip2` compressed file with cURL / libcurl
#include <iostream>
#include <fstream>
#include <filesystem>
#include <curl/curl.h>
#include <zstd.h>
#include <bzlib.h>
namespace fs = std::filesystem;
// constexpr size_t BUFFER_SIZE = 131072;
constexpr size_t BUFFER_SIZE = 1024;
struct ZstdStream
{
ZstdStream(const fs::path &out_path) : stream(ZSTD_createDCtx()),
out(std::make_unique<std::ofstream>(out_path))
{
ZSTD_initDStream(stream);
}
ZSTD_DCtx *stream;
char buffer[BUFFER_SIZE];
std::unique_ptr<std::ofstream> out;
~ZstdStream()
{
ZSTD_freeDCtx(stream);
}
};
int64_t stream_decompress_zstd(void *ptr, int64_t size, int64_t nmemb, ZstdStream *stream)
{
ZSTD_inBuffer in = {ptr, size_t(size * nmemb), 0};
while (in.pos != in.size)
{
ZSTD_outBuffer out = {stream->buffer, BUFFER_SIZE, 0};
const size_t rc = ZSTD_decompressStream(stream->stream, &out, &in);
if (ZSTD_isError(rc))
{
throw std::runtime_error(ZSTD_getErrorName(rc));
}
stream->out->write((const char *)out.dst, out.pos);
}
return size * nmemb;
}
struct Bzip2Stream
{
Bzip2Stream(const fs::path &out_path) : stream{.bzalloc = nullptr, .bzfree = nullptr, .opaque = nullptr},
out(std::make_unique<std::ofstream>(out_path))
{
error = BZ2_bzDecompressInit(&stream, 0, false);
if (error != BZ_OK)
{
throw std::runtime_error("BZ2_bzDecompressInit failed");
}
}
bz_stream stream;
char buffer[BUFFER_SIZE];
std::unique_ptr<std::ofstream> out;
int error;
~Bzip2Stream()
{
BZ2_bzDecompressEnd(&stream);
}
};
int64_t stream_decompress_bzip2(void *ptr, int64_t size, int64_t nmemb, Bzip2Stream *stream)
{
stream->stream.next_in = (char *)ptr;
stream->stream.avail_in = size * nmemb;
while (stream->stream.avail_in && stream->error == BZ_OK)
{
stream->stream.next_out = &stream->buffer[0];
stream->stream.avail_out = BUFFER_SIZE;
stream->error = BZ2_bzDecompress(&stream->stream);
stream->out->write((const char *)stream->buffer, BUFFER_SIZE - stream->stream.avail_out);
}
if (stream->error != BZ_OK && stream->error != BZ_STREAM_END)
{
throw std::runtime_error("BZ2_bzDecompress failed " + std::to_string(stream->error));
}
return size * nmemb;
}
void curl_zstd()
{
std::cout << "curl_zstd" << std::endl;
CURL *curl;
CURLcode res;
const char *url = "https://conda.anaconda.org/conda-forge/linux-64/repodata.json.zst";
const char outfilename[FILENAME_MAX] = "./out_zst.json";
curl = curl_easy_init();
ZstdStream stream(outfilename);
if (curl)
{
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, stream_decompress_zstd);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &stream);
res = curl_easy_perform(curl);
/* always cleanup */
curl_easy_cleanup(curl);
}
}
void curl_bz2()
{
std::cout << "curl_bz2" << std::endl;
CURL *curl;
CURLcode res;
const char *url = "https://conda.anaconda.org/conda-forge/linux-64/repodata.json.bz2";
const char outfilename[FILENAME_MAX] = "./out_bz2.json";
curl = curl_easy_init();
Bzip2Stream stream(outfilename);
if (curl)
{
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, stream_decompress_bzip2);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &stream);
res = curl_easy_perform(curl);
/* always cleanup */
curl_easy_cleanup(curl);
}
}
int main()
{
curl_zstd();
curl_bz2();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment