Skip to content

Instantly share code, notes, and snippets.

@wolfv
Created December 27, 2022 13:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wolfv/5a9a10e06afb6420a0b274a74ff76353 to your computer and use it in GitHub Desktop.
Save wolfv/5a9a10e06afb6420a0b274a74ff76353 to your computer and use it in GitHub Desktop.
Benchmark anaconda repodata downloads with cURL and compression algorithms
#include <iostream>
#include <fstream>
#include <filesystem>
#include <curl/curl.h>
#include <zstd.h>
#include <bzlib.h>
#include <chrono>
namespace fs = std::filesystem;
constexpr size_t BUFFER_SIZE = 256000;
constexpr bool CURL_VERBOSE = false;
struct ZstdStream
{
ZstdStream(const fs::path &out_path) : stream(ZSTD_createDCtx()),
out(std::make_unique<std::ofstream>(out_path))
{
ZSTD_initDStream(stream);
}
ZSTD_DCtx *stream;
char buffer[BUFFER_SIZE];
std::unique_ptr<std::ofstream> out;
~ZstdStream()
{
ZSTD_freeDCtx(stream);
}
};
int64_t stream_decompress_zstd(void *ptr, int64_t size, int64_t nmemb, ZstdStream *stream)
{
ZSTD_inBuffer in = {ptr, size_t(size * nmemb), 0};
while (in.pos != in.size)
{
ZSTD_outBuffer out = {stream->buffer, BUFFER_SIZE, 0};
const size_t rc = ZSTD_decompressStream(stream->stream, &out, &in);
if (ZSTD_isError(rc))
{
throw std::runtime_error(ZSTD_getErrorName(rc));
}
stream->out->write((const char *)out.dst, out.pos);
}
return size * nmemb;
}
struct Bzip2Stream
{
Bzip2Stream(const fs::path &out_path) : stream{.bzalloc = nullptr, .bzfree = nullptr, .opaque = nullptr},
out(std::make_unique<std::ofstream>(out_path))
{
error = BZ2_bzDecompressInit(&stream, 0, false);
if (error != BZ_OK)
{
throw std::runtime_error("BZ2_bzDecompressInit failed");
}
}
bz_stream stream;
char buffer[BUFFER_SIZE];
std::unique_ptr<std::ofstream> out;
int error;
~Bzip2Stream()
{
BZ2_bzDecompressEnd(&stream);
}
};
int64_t stream_decompress_bzip2(void *ptr, int64_t size, int64_t nmemb, Bzip2Stream *stream)
{
stream->stream.next_in = (char *)ptr;
stream->stream.avail_in = size * nmemb;
while (stream->stream.avail_in && stream->error == BZ_OK)
{
stream->stream.next_out = &stream->buffer[0];
stream->stream.avail_out = BUFFER_SIZE;
stream->error = BZ2_bzDecompress(&stream->stream);
stream->out->write((const char *)stream->buffer, BUFFER_SIZE - stream->stream.avail_out);
}
if (stream->error != BZ_OK && stream->error != BZ_STREAM_END)
{
throw std::runtime_error("BZ2_bzDecompress failed " + std::to_string(stream->error));
}
return size * nmemb;
}
void curl_zstd()
{
std::cout << "curl_zstd" << std::endl;
CURL *curl;
CURLcode res;
const char *url = "https://conda.anaconda.org/conda-forge/linux-64/repodata.json.zst";
const char outfilename[FILENAME_MAX] = "./out_zst.json";
curl = curl_easy_init();
ZstdStream stream(outfilename);
if (curl)
{
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_VERBOSE, CURL_VERBOSE);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, stream_decompress_zstd);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &stream);
res = curl_easy_perform(curl);
/* always cleanup */
curl_easy_cleanup(curl);
}
}
void curl_bz2()
{
std::cout << "curl_bz2" << std::endl;
CURL *curl;
CURLcode res;
const char *url = "https://conda.anaconda.org/conda-forge/linux-64/repodata.json.bz2";
const char outfilename[FILENAME_MAX] = "./out_bz2.json";
curl = curl_easy_init();
Bzip2Stream stream(outfilename);
if (curl)
{
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_VERBOSE, CURL_VERBOSE);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, stream_decompress_bzip2);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &stream);
res = curl_easy_perform(curl);
/* always cleanup */
curl_easy_cleanup(curl);
}
}
template <class T>
std::size_t ostream_callback(char* buffer, std::size_t size, std::size_t nitems, T* stream)
{
stream->write(buffer, size * nitems);
return size * nitems;
}
void curl_gzip()
{
std::cout << "curl_bz2" << std::endl;
CURL *curl;
CURLcode res;
const char *url = "https://conda.anaconda.org/conda-forge/linux-64/repodata.json";
const char outfilename[FILENAME_MAX] = "./out_gzip.json";
curl = curl_easy_init();
std::ofstream stream(outfilename);
if (curl)
{
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_VERBOSE, CURL_VERBOSE);
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ostream_callback<std::ofstream>);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &stream);
res = curl_easy_perform(curl);
/* always cleanup */
curl_easy_cleanup(curl);
}
}
int main()
{
std::chrono::system_clock::time_point start, end;
start = std::chrono::system_clock::now();
curl_zstd();
end = std::chrono::system_clock::now();
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
std::cout << "curl_zstd: " << elapsed << "ms" << std::endl;
start = std::chrono::system_clock::now();
curl_bz2();
end = std::chrono::system_clock::now();
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
std::cout << "curl_bz2: " << elapsed << "ms" << std::endl;
start = std::chrono::system_clock::now();
curl_gzip();
end = std::chrono::system_clock::now();
elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
std::cout << "curl_gzip: " << elapsed << "ms" << std::endl;
return 0;
}
curl_zstd: 3861ms
curl_bz2: 3946ms
curl_gzip: 5708ms
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment