Skip to content

Instantly share code, notes, and snippets.

@wolfv
Created January 3, 2023 16:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wolfv/3a5fab163cd73c213168689b7374ae46 to your computer and use it in GitHub Desktop.
Save wolfv/3a5fab163cd73c213168689b7374ae46 to your computer and use it in GitHub Desktop.
Benchmark libarchive .zstd extraction
#include <filesystem>
#include <iostream>
#include <archive.h>
#include <archive_entry.h>
#include <cassert>
#include <fcntl.h> /* For O_RDWR */
#include <unistd.h> /* For open(), creat() */
namespace fs = std::filesystem;
static int copy_data_fd(archive* ar, int fd)
{
int r = 0;
const void* buff = nullptr;
std::size_t size = 0;
la_int64_t offset = 0;
ssize_t bytes_written = 0;
while (true)
{
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF)
{
return ARCHIVE_OK;
}
bytes_written = write(fd, buff, size);
if (bytes_written < 0) {
std::cout << "Error writing to file: " << strerror(errno) << std::endl;
}
}
return r;
}
static int
copy_data(struct archive *ar, struct archive *aw)
{
int r;
const void *buff;
size_t size;
int64_t offset;
for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF)
return (ARCHIVE_OK);
if (r != ARCHIVE_OK) {
std::cout << archive_error_string(ar) << std::endl;
return (r);
}
r = archive_write_data_block(aw, buff, size, offset);
if (r != ARCHIVE_OK) {
std::cout << archive_error_string(ar) << std::endl;
return (r);
}
}
return r;
}
void extract_archive(archive *a, const fs::path &destination, const bool fast)
{
std::cout << "Going to destination: " << destination << std::endl;
auto prev_path = fs::current_path();
if (!fs::exists(destination))
{
fs::create_directories(destination);
}
fs::current_path(destination);
struct archive *ext;
struct archive_entry *entry;
int flags;
int r;
/* Select which attributes we want to restore. */
flags = ARCHIVE_EXTRACT_TIME;
// flags |= ARCHIVE_EXTRACT_PERM;
flags |= ARCHIVE_EXTRACT_SECURE_NODOTDOT;
flags |= ARCHIVE_EXTRACT_SECURE_SYMLINKS;
flags |= ARCHIVE_EXTRACT_SECURE_NOABSOLUTEPATHS;
flags |= ARCHIVE_EXTRACT_SPARSE;
flags |= ARCHIVE_EXTRACT_UNLINK;
ext = archive_write_disk_new();
archive_write_disk_set_options(ext, flags);
archive_write_disk_set_standard_lookup(ext);
for (;;)
{
r = archive_read_next_header(a, &entry);
if (r == ARCHIVE_EOF)
{
break;
}
if (r < ARCHIVE_OK)
{
continue;
}
r = archive_write_header(ext, entry);
if (r < ARCHIVE_OK)
{
throw std::runtime_error(archive_error_string(ext));
}
if (archive_entry_size(entry) > 0)
{
if (fast) {
int fd = open(archive_entry_pathname(entry), O_WRONLY);
if (fd < 0) {
std::cout << "Error opening file: " << strerror(errno) << std::endl;
exit(1);
}
r = archive_read_data_into_fd(a, fd);
assert(close(fd) == 0);
} else {
r = copy_data(a, ext);
}
if (r < ARCHIVE_OK)
{
const char *err_str = archive_error_string(ext);
if (err_str == nullptr)
{
err_str = archive_error_string(a);
}
if (err_str != nullptr)
{
throw std::runtime_error(err_str);
}
throw std::runtime_error("Extraction: writing data was not successful.");
}
}
r = archive_write_finish_entry(ext);
if (r == ARCHIVE_WARN)
{
std::cout << "libarchive warning: " << archive_error_string(a);
}
else if (r < ARCHIVE_OK)
{
throw std::runtime_error(archive_error_string(ext));
}
}
archive_read_close(a);
archive_read_free(a);
archive_write_close(ext);
archive_write_free(ext);
fs::current_path(prev_path);
}
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "Usage: " << argv[0] << " <archive>" << std::endl;
return 1;
}
auto archive_path = fs::path(argv[1]);
if (!fs::exists(archive_path)) {
std::cout << "Archive does not exist: " << archive_path << std::endl;
return 1;
}
auto archive = archive_read_new();
archive_read_support_filter_all(archive);
archive_read_support_format_all(archive);
auto r = archive_read_open_filename(archive, archive_path.c_str(), 10240);
if (r != ARCHIVE_OK) {
std::cout << "Could not open archive: " << archive_path << std::endl;
return 1;
}
bool fast = false;
if (argc > 2) {
char* fast_char = argv[2];
if (fast_char[0] == 'f') {
fast = true;
}
}
extract_archive(archive, fast ? fs::current_path() / "extracted-fast" : fs::current_path() / "extracted", fast);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment