simonhf/_libarchive-read-blocking.md

## _libarchive-read-blocking.md

      
    Raw
  

              _libarchive-read-blocking.md
            
          
    Experiments with libarchive read blocking: Part 1

Disclaimer: Don't know much about libarchive... yet!
Step 1: Describe the issue


When reading a streamed archive using archive_read_open() [1] and archive_read_extract() [2] then a callback is called one or more times to read chunks of the archive.
This creates an issue if (a) your program needs to wait for the next chunk to arrive, and/or (b) you want to process multiple archive streams in the same thread.
Effectively archive_read_open() [1] and archive_read_extract() [2] block until all the necessary number of archive stream chunks have been read via the callback.

[1] archive_read_open
[2] archive_read_extract
Step 2: Surf around looking for others who have come up against the issue


libarchive-discuss: archive_read_callback and streaming bytes read off the network

Step 3: Download and build libarchive

$ git clone https://github.com/libarchive/libarchive.git
$ cd libarchive/
$ /bin/sh build/autogen.sh
$ ./configure
$ make

Step 4: Create an example using the read callback


Use the source code from this github issue [1] and copy it to example.cpp.
Note: The code output archive is hard-coded as test.tar.gz.
Try building it:

$ g++ -std=c++11 -O2 -Ilibarchive -L.libs -o example example.cpp -larchive
$ ./example
./example{-r | -w} file[s]
$ ./example -w example.cpp
Compressing -w...
Compressing example.cpp...
$ ls -al test.tar.gz
-rw-rw-r-- 1 simon simon 1406 Oct 16 11:38 test.tar.gz
$ ./example -r example.cpp
Attempting to open example.cpp
calling archive_read_open_memory..

[1] github issue 1011
Step 5: Hack the code to reproduce the issue

Step 5a: Create a version of the code which attempts to read and extract from two archive streams concurrently:


Note: The callback business logic is hacked so that the first three read callbacks for an archive only ever return 7 bytes. This helps draw attention to the blocking nature of archive_read_open().

$ cp example.cpp example2.cpp
$ # hack the code

Step 5b: Create two archive streams:

$ cp .libs/libarchive.a    .libs/libarchive_test_1a.a
$ cp .libs/libarchive.a    .libs/libarchive_test_2b.a
$ cp .libs/libarchive_fe.a .libs/libarchive_test_1b.a
$ cp .libs/libarchive_fe.a .libs/libarchive_test_2a.a
$ ls -al .libs/libarchive*.a
-rw-rw-r-- 1 simon simon 6130184 Oct 17 15:50 .libs/libarchive.a
-rw-rw-r-- 1 simon simon   51168 Oct 17 15:50 .libs/libarchive_fe.a
-rw-rw-r-- 1 simon simon 6130184 Oct 17 15:50 .libs/libarchive_test_1a.a
-rw-rw-r-- 1 simon simon   51168 Oct 17 15:51 .libs/libarchive_test_1b.a
-rw-rw-r-- 1 simon simon   51168 Oct 17 15:51 .libs/libarchive_test_2a.a
-rw-rw-r-- 1 simon simon 6130184 Oct 17 15:51 .libs/libarchive_test_2b.a
$ g++ -DFROM_STREAM -std=c++11 -O2 -Ilibarchive -L.libs -o example2 example2.cpp -larchive && ./example2 -w .libs/libarchive_test_1*.a ; cp test.tar.gz test_1.tar.gz
- compressing: .libs/libarchive_test_1a.a
- archive_entry_new()
- read() = 2097152
- read() = 2097152
- read() = 1935880
- read() = 0
- compressing: .libs/libarchive_test_1b.a
- archive_entry_clear()
- read() = 51168
- read() = 0
$ g++ -DFROM_STREAM -std=c++11 -O2 -Ilibarchive -L.libs -o example2 example2.cpp -larchive && ./example2 -w .libs/libarchive_test_2*.a ; cp test.tar.gz test_2.tar.gz
- compressing: .libs/libarchive_test_2a.a
- archive_entry_new()
- read() = 51168
- read() = 0
- compressing: .libs/libarchive_test_2b.a
- archive_entry_clear()
- read() = 2097152
- read() = 2097152
- read() = 1935880
- read() = 0
$ ls -al test_*.tar.gz
-rw-rw-r-- 1 simon simon 1689852 Oct 17 15:54 test_1.tar.gz
-rw-rw-r-- 1 simon simon 1689589 Oct 17 15:55 test_2.tar.gz
$ tar -ztvf test_1.tar.gz
---------- 0/0         6130184 1969-12-31 16:00 .libs/libarchive_test_1a.a
---------- 0/0           51168 1969-12-31 16:00 .libs/libarchive_test_1b.a
$ tar -ztvf test_2.tar.gz
---------- 0/0           51168 1969-12-31 16:00 .libs/libarchive_test_2a.a
---------- 0/0         6130184 1969-12-31 16:00 .libs/libarchive_test_2b.a

Step 5c: Extract the first archive stream individually:

     - /* for each archive file: read off disk, and archive_read_open() */
0=id - attempting to open: test_1.tar.gz
0=id - read 1,689,852 bytes into id_buff_archive[0]
0=id - archive_read_new() {}
0=id - archive_read_open() {
0=id   - libarchiveRead() {} = 7=bytes_available // callback
0=id   - libarchiveRead() {} = 7=bytes_available // callback
0=id   - libarchiveRead() {} = 7=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   } = ARCHIVE_OK
     - /* for each archive file: archive_read_next_header() and archive_read_extract() */
0=id - archive_read_next_header() {} = ARCHIVE_OK
0=id - archive_entry_pathname(entry) {} = .libs/libarchive_test_1a.a // file to extract
0=id - archive_read_extract() {
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 116,967=bytes_available // callback
0=id   } = ARCHIVE_OK
     - /* for each archive file: archive_read_next_header() and archive_read_extract() */
0=id - archive_read_next_header() {} = ARCHIVE_OK
0=id - archive_entry_pathname(entry) {} = .libs/libarchive_test_1b.a // file to extract
0=id - archive_read_extract() {
0=id   - libarchiveRead() {} = 0=bytes_available // callback
0=id   } = ARCHIVE_OK
     - /* for each archive file: archive_read_next_header() and archive_read_extract() */
0=id - archive_read_next_header() {} = ARCHIVE_EOF
     - /* for each archive file: archive_read_close() and archive_read_free() */
0=id - archive_read_close() {}
0=id - archive_read_free() {}

Step 5d: Extract the second archive stream individually:

     - /* for each archive file: read off disk, and archive_read_open() */
0=id - attempting to open: test_2.tar.gz
0=id - read 1,689,589 bytes into id_buff_archive[0]
0=id - archive_read_new() {}
0=id - archive_read_open() {
0=id   - libarchiveRead() {} = 7=bytes_available // callback
0=id   - libarchiveRead() {} = 7=bytes_available // callback
0=id   - libarchiveRead() {} = 7=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   } = ARCHIVE_OK
     - /* for each archive file: archive_read_next_header() and archive_read_extract() */
0=id - archive_read_next_header() {} = ARCHIVE_OK
0=id - archive_entry_pathname(entry) {} = .libs/libarchive_test_2a.a // file to extract
0=id - archive_read_extract() {
0=id   } = ARCHIVE_OK
     - /* for each archive file: archive_read_next_header() and archive_read_extract() */
0=id - archive_read_next_header() {} = ARCHIVE_OK
0=id - archive_entry_pathname(entry) {} = .libs/libarchive_test_2b.a // file to extract
0=id - archive_read_extract() {
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 116,704=bytes_available // callback
0=id   - libarchiveRead() {} = 0=bytes_available // callback
0=id   } = ARCHIVE_OK
     - /* for each archive file: archive_read_next_header() and archive_read_extract() */
0=id - archive_read_next_header() {} = ARCHIVE_EOF
     - /* for each archive file: archive_read_close() and archive_read_free() */
0=id - archive_read_close() {}
0=id - archive_read_free() {}

Step 5e: Attempt to extract the both archive streams concurrently:


Note: Lines starting 0=id are dealing with test_1.tar.gz, and lines starting 1=id are dealing with test_2.tar.gz.
Note: We can see that once we start with the callbacks for a particular archive, there's no way to pause execution for the archive (and continue with the other archive).
Note: Once started, the callbacks for an archive are called repeatly one after the other until enough chunks of the archive have been read to extract the next file within the archive.
Note: In the example below, which contains a longer and shorter a archived file, we can see how the longer file causes many callbacks in succession.
Note: If each callback presented only another e.g. 1,500 bytes (instead of 256 KB) to libarchive, then there would be very many more callbacks, and delay if waiting for the packets from the network.

$ g++ -DFROM_STREAM -std=c++11 -O2 -Ilibarchive -L.libs -o example2 example2.cpp -larchive && ./example2 -r test_1.tar.gz test_2.tar.gz
     - /* for each archive file: read off disk, and archive_read_open() */
0=id - attempting to open: test_1.tar.gz
0=id - read 1,689,852 bytes into id_buff_archive[0]
0=id - archive_read_new() {}
0=id - archive_read_open() {
0=id   - libarchiveRead() {} = 7=bytes_available // callback
0=id   - libarchiveRead() {} = 7=bytes_available // callback
0=id   - libarchiveRead() {} = 7=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   } = ARCHIVE_OK
1=id - attempting to open: test_2.tar.gz
1=id - read 1,689,589 bytes into id_buff_archive[1]
1=id - archive_read_new() {}
1=id - archive_read_open() {
1=id   - libarchiveRead() {} = 7=bytes_available // callback
1=id   - libarchiveRead() {} = 7=bytes_available // callback
1=id   - libarchiveRead() {} = 7=bytes_available // callback
1=id   - libarchiveRead() {} = 262,144=bytes_available // callback
1=id   } = ARCHIVE_OK
     - /* for each archive file: archive_read_next_header() and archive_read_extract() */
0=id - archive_read_next_header() {} = ARCHIVE_OK
0=id - archive_entry_pathname(entry) {} = .libs/libarchive_test_1a.a // file to extract
0=id - archive_read_extract() {
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 262,144=bytes_available // callback
0=id   - libarchiveRead() {} = 116,967=bytes_available // callback
0=id   } = ARCHIVE_OK
1=id - archive_read_next_header() {} = ARCHIVE_OK
1=id - archive_entry_pathname(entry) {} = .libs/libarchive_test_2a.a // file to extract
1=id - archive_read_extract() {
1=id   } = ARCHIVE_OK
     - /* for each archive file: archive_read_next_header() and archive_read_extract() */
0=id - archive_read_next_header() {} = ARCHIVE_OK
0=id - archive_entry_pathname(entry) {} = .libs/libarchive_test_1b.a // file to extract
0=id - archive_read_extract() {
0=id   - libarchiveRead() {} = 0=bytes_available // callback
0=id   } = ARCHIVE_OK
1=id - archive_read_next_header() {} = ARCHIVE_OK
1=id - archive_entry_pathname(entry) {} = .libs/libarchive_test_2b.a // file to extract
1=id - archive_read_extract() {
1=id   - libarchiveRead() {} = 262,144=bytes_available // callback
1=id   - libarchiveRead() {} = 262,144=bytes_available // callback
1=id   - libarchiveRead() {} = 262,144=bytes_available // callback
1=id   - libarchiveRead() {} = 262,144=bytes_available // callback
1=id   - libarchiveRead() {} = 262,144=bytes_available // callback
1=id   - libarchiveRead() {} = 116,704=bytes_available // callback
1=id   - libarchiveRead() {} = 0=bytes_available // callback
1=id   } = ARCHIVE_OK
     - /* for each archive file: archive_read_next_header() and archive_read_extract() */
0=id - archive_read_next_header() {} = ARCHIVE_EOF
1=id - archive_read_next_header() {} = ARCHIVE_EOF
     - /* for each archive file: archive_read_close() and archive_read_free() */
0=id - archive_read_close() {}
0=id - archive_read_free() {}
1=id - archive_read_close() {}
1=id - archive_read_free() {}

Step 6: Brain storm idea to change libarchive so it does not block on read


Note: Ideally an API change would maintain backwards compatibility.
Note: Ideally an API change would not expand the already enormous and complicated API.

Idea #1: Introduce new ARCHIVE_WOULDBLOCK return value


Note: This idea is born just from examining the current API, without examining libarchive internals.
Note: In theory this change would be the lowest touch to the libarchive documentation?
Note: This would potentially double the number of archive_read_callback() calls? Why? The first would always offer raw bytes, while the second would always return ARCHIVE_WOULDBLOCK, and so on. The more complicated idea #2 attempts to mitigate performance issues, but could end up being over complicated?
Introduce ARCHIVE_WOULDBLOCK as an extra return value along with ARCHIVE_OK [1] et al.
archive_read_open() [2] can be called repeatedly if ARCHIVE_WOULDBLOCK is returned.
archive_read_extract() [3] can be called repeatedly if ARCHIVE_WOULDBLOCK is returned.
archive_read_callback() can return ARCHIVE_WOULDBLOCK if there is no raw bytes currently to pass to libarchive.
archive_read_next_header() [4] will return ARCHIVE_WOULDBLOCK if archive_read_open() or archive_read_extract() have not been fed enough raw bytes.

[1] ARCHIVE_OK
[2] archive_read_open()
[3] archive_read_extract()
[4] archive_read_next_header()
Idea #2: Like idea #1 with new archive_read_offer_bytes() function instead of archive_read_callback()


Note: This idea is born just from examining the current API, without examining libarchive internals.
Note: This idea makes the API even more complicated but saves a callback at run-time; likely relatively small performance savings.
Note: archive_read_open() actually has 4 possible callbacks, so spinning one out into archive_read_offer_bytes() might end up really over complicating the API?
archive_read_open() would be given NULL as the address for the callback archive_read_callback(), and will therefore always return ARCHIVE_WOULDBLOCK.
If archive_read_open() or archive_read_extract() or archive_read_next_header() return ARCHIVE_WOULDBLOCK, then the new archive_read_offer_bytes() function should be called (which acts as a substitute for archive_read_callback()).
The new archive_read_offer_bytes() function returns what archive_read_open() or archive_read_extract() would have returned, including ARCHIVE_WOULDBLOCK if `archive_read_offer_bytes()    needs to be called again in the future.


## example.cpp
// libstd
#include <iostream>
#include <vector>
#include <string>
#include <unistd.h>
#include <fcntl.h>

// libarchive
#include <archive.h>
#include <archive_entry.h>

struct mydata
{
    char* data;
    la_ssize_t size;
};

la_ssize_t libarchiveRead(struct archive* a, void* client_data, const void** block)
{
    std::cout << "calling custom read().." << std::endl;
    mydata *current_data = (mydata*)client_data;
    *block = current_data->data;
    return current_data->size;
}

int main(int argc, char** argv) {

    if(argc < 3)
    {
        std::cout << argv[0] << "{-r | -w} file[s]" << std::endl;
        return 1;
    }

    std::vector<std::string> filenames;
    filenames.reserve(argc);

    while (*++argv != nullptr)
    {
        filenames.emplace_back(*argv);
    }

    bool modeRead = (filenames[0] == "-r");

    // archive related variables
    struct archive *archive;

    char buff_archive[1024 * 1024];

    // archive entry/file variables
    struct archive_entry *entry;
    struct stat st;

    char buff_entry[1024 * 1024];
    int len;
    int fd;

    if(modeRead)
    {
        std::cout << "Attempting to open " << filenames[1] << std::endl;

        fd = open(filenames[1].c_str(), O_RDONLY);
        len = read(fd, buff_archive, sizeof(buff_archive));

        archive = archive_read_new();
        archive_read_support_format_all(archive);
        archive_read_support_filter_all(archive);

        #ifdef FROM_STREAM

            mydata *client_data = new mydata();

            client_data->data = buff_archive;
            client_data->size = len;

            std::cout << "calling archive_read_open.." << std::endl;
            int res = archive_read_open(archive,
                                        client_data,
                                        nullptr,
                                        (archive_read_callback*)libarchiveRead,
                                        nullptr);

        #else
            std::cout << "calling archive_read_open_memory.." << std::endl;
            int res = archive_read_open_memory(archive, buff_archive, len);

        #endif

        int flags = 0;
        flags = ARCHIVE_EXTRACT_TIME;
        flags |= ARCHIVE_EXTRACT_PERM;
        flags |= ARCHIVE_EXTRACT_ACL;
        flags |= ARCHIVE_EXTRACT_FFLAGS;

        res = archive_read_next_header(archive, &(entry));

        while(res == ARCHIVE_OK) {
            std::cout << "Extracting " << archive_entry_pathname(entry) << "..." << std::endl;
            // extract current entry
            archive_read_extract(archive, entry, flags);
            // read next if available
            res = archive_read_next_header(archive, &(entry));
        }

        archive_read_close(archive);
        archive_read_free(archive);
        close(fd);
    }
    else
    {
        archive = archive_write_new(); // initializes a new archive
        archive_write_add_filter_gzip(archive); // gzips incoming data
        archive_write_set_format_pax_restricted(archive); // only use extensions when necessary
        archive_write_set_bytes_per_block(archive, 8192);
        archive_write_open_filename(archive, "test.tar.gz");

        for (auto const& value: filenames){
            std::cout << "Compressing " << value << "...\n";
            stat(value.c_str(), &st);
            if(entry == nullptr){
                entry = archive_entry_new();
            }
            else
            {
                archive_entry_clear(entry);
            }

            // the following attributes are mandatory
            archive_entry_set_pathname(entry, value.c_str());
            archive_entry_set_size(entry, st.st_size);
            archive_entry_set_filetype(entry, st.st_mode);

            archive_write_header(archive, entry);

            fd = open(value.c_str(), O_RDONLY);
            len = read(fd, buff_entry, sizeof(buff_entry));
            while ( len > 0 ) {
                archive_write_data(archive, buff_entry, len);
                len = read(fd, buff_entry, sizeof(buff_entry));
            }
            close(fd);
        }

        // done looping over files
        archive_entry_free(entry);

        archive_write_close(archive);
        archive_write_free(archive);
    }

    return 0;

}

## example2.cpp
// libstd
#include <iostream>
#include <vector>
#include <string>
#include <unistd.h>
#include <fcntl.h>

// libarchive
#include <archive.h>
#include <archive_entry.h>

#define CAST(TYPE, PTR) ((TYPE)(uintptr_t)(PTR))

#define BUFF_ARCHIVE_LEN (2 * 1024 * 1024)

#define ID_MAX (2)

int    id_buff_archive_used[ID_MAX];
char   id_buff_archive[ID_MAX][BUFF_ARCHIVE_LEN];
char * id_chunk_addr[ID_MAX];
int    id_chunk_left[ID_MAX];
int    id_chunk_size[ID_MAX];
int    id_chunk_num [ID_MAX];
int    id_chunk_todo[ID_MAX];

struct archive       * id_archive[ID_MAX];
struct archive_entry * id_entry[ID_MAX] = {nullptr};

la_ssize_t libarchiveRead(struct archive* a, void* client_data, const void** chunk)
{
    int id = CAST(int, client_data);
    *chunk = id_chunk_addr[id];
    la_ssize_t bytes_available = id_chunk_left[id] < id_chunk_size[id] ? id_chunk_left[id] : id_chunk_size[id];
    if ((id_chunk_num[id] < 3) && (id_chunk_left[id] > 123)) { // special business logic to cause archive_read_open() to call this callback multiple times
        bytes_available = 7;
    }
    id_chunk_left[id] -= bytes_available;
    id_chunk_addr[id] += bytes_available;
    id_chunk_num [id] ++;
    printf("%d=id   - libarchiveRead() {} = %'ld=bytes_available // callback\n", id, bytes_available);
    return bytes_available;
}

int main(int argc, char** argv) {
    setlocale(LC_NUMERIC, "");

    if(argc < 3)
    {
        std::cout << argv[0] << "{-r | -w} file[s]" << std::endl;
        return 1;
    }

    std::string mode = *++argv; // "-r" or "-w"
    std::vector<std::string> filenames;
    filenames.reserve(argc);

    while (*++argv != nullptr) {
        filenames.emplace_back(*argv);
    }

    // archive entry/file variables
    struct stat st;

    char buff_entry[2 * 1024 * 1024];
    int fd;

    if("-r" == mode /* read mode */) {
        printf("     - /* for each archive file: read off disk, and archive_read_open() */\n");

        int id_max = filenames.size();
        for (int id = 0; id < id_max; id ++) {
            printf("%d=id - attempting to open: %s\n", id, filenames[id].c_str());

            int fd = open(filenames[id].c_str(), O_RDONLY);
            id_buff_archive_used[id] = read(fd, &id_buff_archive[id][0], sizeof(id_buff_archive[id]));
            printf("%d=id - read %'d bytes into id_buff_archive[%d]\n", id, id_buff_archive_used[id], id);
            close(fd);

            printf("%d=id - archive_read_new() {}\n", id);
            id_archive[id] = archive_read_new();
            archive_read_support_format_all(id_archive[id]);
            archive_read_support_filter_all(id_archive[id]);

            #ifdef FROM_STREAM

            id_chunk_addr[id] = &id_buff_archive[id][0];
            id_chunk_left[id] =  id_buff_archive_used[id];
            id_chunk_size[id] = 256 * 1024;
            id_chunk_num [id] = 0;
            id_chunk_todo[id] = 1;

            printf("%d=id - archive_read_open() {\n", id);
            int res = archive_read_open(id_archive[id], CAST(void *, id), nullptr, (archive_read_callback*)libarchiveRead, nullptr);
            printf("%d=id   } = %s\n", id,
                res == ARCHIVE_OK  ? "ARCHIVE_OK"  :
                res == ARCHIVE_EOF ? "ARCHIVE_EOF" : "ERROR: UNKNOWN");

            #else

            std::cout << "calling archive_read_open_memory.." << std::endl;
            int res = archive_read_open_memory(id_archive[id], &id_buff_archive[id][0], id_buff_archive_used[id]);

            #endif
        }

        int flags = 0;
        flags  = ARCHIVE_EXTRACT_TIME;
        flags |= ARCHIVE_EXTRACT_PERM;
        flags |= ARCHIVE_EXTRACT_ACL;
        flags |= ARCHIVE_EXTRACT_FFLAGS;

        int filenames_unpacked = 0;
        while (filenames_unpacked < id_max) {
            printf("     - /* for each archive file: archive_read_next_header() and archive_read_extract() */\n");
            for (int id = 0; id < id_max; id ++) {
                if (id_chunk_todo[id]) {
                    int res = archive_read_next_header(id_archive[id], &(id_entry[id]));
                    printf("%d=id - archive_read_next_header() {} = %s\n", id,
                        res == ARCHIVE_OK  ? "ARCHIVE_OK"  :
                        res == ARCHIVE_EOF ? "ARCHIVE_EOF" : "ERROR: UNKNOWN");

                    if (ARCHIVE_EOF == res) {
                        id_chunk_todo[id] = 0;
                        filenames_unpacked ++;
                    }
                    else {
                        printf("%d=id - archive_entry_pathname(entry) {} = %s // file to extract\n", id, archive_entry_pathname(id_entry[id]));

                        printf("%d=id - archive_read_extract() {\n", id);
                        res = archive_read_extract(id_archive[id], id_entry[id], flags);
                        printf("%d=id   } = %s\n", id,
                            res == ARCHIVE_OK  ? "ARCHIVE_OK"  :
                            res == ARCHIVE_EOF ? "ARCHIVE_EOF" : "ERROR: UNKNOWN");
                    }
                }
            }
        }

        printf("     - /* for each archive file: archive_read_close() and archive_read_free() */\n");

        for (int id = 0; id < id_max; id ++) {
            printf("%d=id - archive_read_close() {}\n", id);
            archive_read_close(id_archive[id]);

            printf("%d=id - archive_read_free() {}\n", id);
            archive_read_free(id_archive[id]);
        }
    }
    else {
        int id = 0;

        id_archive[id] = archive_write_new(); // initializes a new archive
        archive_write_add_filter_gzip(id_archive[id]); // gzips incoming data
        archive_write_set_format_pax_restricted(id_archive[id]); // only use extensions when necessary
        archive_write_set_bytes_per_block(id_archive[id], 8192);
        archive_write_open_filename(id_archive[id], "test.tar.gz");

        for (auto const& value: filenames) {
            printf("- compressing: %s\n", value.c_str());
            stat(value.c_str(), &st);
            if(id_entry[id] == nullptr) {
                printf("- archive_entry_new() {}\n");
                id_entry[id] = archive_entry_new();
            }
            else {
                printf("- archive_entry_clear() {}\n");
                archive_entry_clear(id_entry[id]);
            }

            // the following attributes are mandatory
            archive_entry_set_pathname(id_entry[id], value.c_str());
            archive_entry_set_size(id_entry[id], st.st_size);
            archive_entry_set_filetype(id_entry[id], st.st_mode);

            archive_write_header(id_archive[id], id_entry[id]);

            fd = open(value.c_str(), O_RDONLY);
            int len = read(fd, buff_entry, sizeof(buff_entry));
            printf("- read() = %d\n", len);
            while ( len > 0 ) {
                archive_write_data(id_archive[id], buff_entry, len);
                len = read(fd, buff_entry, sizeof(buff_entry));
                printf("- read() = %d\n", len);
            }
            close(fd);
        }

        // done looping over files
        archive_entry_free(id_entry[id]);

        archive_write_close(id_archive[id]);
        archive_write_free(id_archive[id]);
    }

    return 0;

}
	// libstd
	#include <iostream>
	#include <vector>
	#include <string>
	#include <unistd.h>
	#include <fcntl.h>

	// libarchive
	#include <archive.h>
	#include <archive_entry.h>

	struct mydata
	{
	char* data;
	la_ssize_t size;
	};

	la_ssize_t libarchiveRead(struct archive* a, void* client_data, const void** block)
	{
	std::cout << "calling custom read().." << std::endl;
	mydata current_data = (mydata)client_data;
	*block = current_data->data;
	return current_data->size;
	}

	int main(int argc, char** argv) {

	if(argc < 3)
	{
	std::cout << argv[0] << "{-r \| -w} file[s]" << std::endl;
	return 1;
	}

	std::vector<std::string> filenames;
	filenames.reserve(argc);

	while (*++argv != nullptr)
	{
	filenames.emplace_back(*argv);
	}

	bool modeRead = (filenames[0] == "-r");

	// archive related variables
	struct archive *archive;

	char buff_archive[1024 * 1024];

	// archive entry/file variables
	struct archive_entry *entry;
	struct stat st;

	char buff_entry[1024 * 1024];
	int len;
	int fd;

	if(modeRead)
	{
	std::cout << "Attempting to open " << filenames[1] << std::endl;

	fd = open(filenames[1].c_str(), O_RDONLY);
	len = read(fd, buff_archive, sizeof(buff_archive));

	archive = archive_read_new();
	archive_read_support_format_all(archive);
	archive_read_support_filter_all(archive);

	#ifdef FROM_STREAM

	mydata *client_data = new mydata();

	client_data->data = buff_archive;
	client_data->size = len;

	std::cout << "calling archive_read_open.." << std::endl;
	int res = archive_read_open(archive,
	client_data,
	nullptr,
	(archive_read_callback*)libarchiveRead,
	nullptr);

	#else
	std::cout << "calling archive_read_open_memory.." << std::endl;
	int res = archive_read_open_memory(archive, buff_archive, len);

	#endif

	int flags = 0;
	flags = ARCHIVE_EXTRACT_TIME;
	flags \|= ARCHIVE_EXTRACT_PERM;
	flags \|= ARCHIVE_EXTRACT_ACL;
	flags \|= ARCHIVE_EXTRACT_FFLAGS;

	res = archive_read_next_header(archive, &(entry));

	while(res == ARCHIVE_OK) {
	std::cout << "Extracting " << archive_entry_pathname(entry) << "..." << std::endl;
	// extract current entry
	archive_read_extract(archive, entry, flags);
	// read next if available
	res = archive_read_next_header(archive, &(entry));
	}

	archive_read_close(archive);
	archive_read_free(archive);
	close(fd);
	}
	else
	{
	archive = archive_write_new(); // initializes a new archive
	archive_write_add_filter_gzip(archive); // gzips incoming data
	archive_write_set_format_pax_restricted(archive); // only use extensions when necessary
	archive_write_set_bytes_per_block(archive, 8192);
	archive_write_open_filename(archive, "test.tar.gz");

	for (auto const& value: filenames){
	std::cout << "Compressing " << value << "...\n";
	stat(value.c_str(), &st);
	if(entry == nullptr){
	entry = archive_entry_new();
	}
	else
	{
	archive_entry_clear(entry);
	}

	// the following attributes are mandatory
	archive_entry_set_pathname(entry, value.c_str());
	archive_entry_set_size(entry, st.st_size);
	archive_entry_set_filetype(entry, st.st_mode);

	archive_write_header(archive, entry);

	fd = open(value.c_str(), O_RDONLY);
	len = read(fd, buff_entry, sizeof(buff_entry));
	while ( len > 0 ) {
	archive_write_data(archive, buff_entry, len);
	len = read(fd, buff_entry, sizeof(buff_entry));
	}
	close(fd);
	}

	// done looping over files
	archive_entry_free(entry);

	archive_write_close(archive);
	archive_write_free(archive);
	}

	return 0;

	}