Skip to content

Instantly share code, notes, and snippets.

@Mr0grog
Last active February 18, 2022 04:15
Show Gist options
  • Save Mr0grog/1f5e14ac360e64304c0031282e0cae3f to your computer and use it in GitHub Desktop.
Save Mr0grog/1f5e14ac360e64304c0031282e0cae3f to your computer and use it in GitHub Desktop.
Test out the performance and memory usage of various ways to compose a gzip of streaming JSON log lines.
/**
* Test out the performance and memory usage of various ways to compose a gzip
* of streaming JSON log lines.
*
* One fun property of gzip is that a stream with multiple gzip chunks is
* itself valid as a single gzip stream/file. That means there are lots of
* interesting ways to break down how you build and stream out gzip data.
*/
const fs = require("fs");
const stream = require("stream");
const { pipeline } = require("stream/promises");
const zlib = require('zlib');
const { Buffer } = require('buffer');
const { promisify } = require('util');
const JSONStream = require("JSONStream");
const split2 = require("split2");
const streamChunker = require('stream-chunker');
// A reasonably large input file that matches the type of input we expect.
// ~3 GB of ND-JSON data. Most lines are pretty short (~150 bytes), but some
// are very long (~250 kB). All lines share the same structure, and lines of
// similar length tend to come together in chunks.
const inFilePath = "./data/univaf_raw/availability_log-2021-09-21.ndjson";
/**
* Simplest implementation: just throw Node's built-in gzip stream in a stream
* pipeline after we serialize the JSON.
*/
async function compressionSingleStream() {
let i = 0;
await pipeline(
fs.createReadStream(inFilePath),
split2(),
async function* (source) {
for await (const line of source) {
if (line) {
yield JSON.parse(line);
}
}
},
JSONStream.stringify(false),
zlib.createGzip({ level: zlib.constants.Z_BEST_COMPRESSION }),
fs.createWriteStream(`${inFilePath}.basicStream.gz`)
);
}
/**
* Possibly over-engineered: create a stream that outputs multiple gzips, one
* after the other (which together form a valid gzip). There's a lot of messy
* work here involved in making sure the whole thing continues to stream without
* taking over a huge chunk of memory for the batching.
*/
async function compressionBatchSubStream() {
class GzipBatchStream extends stream.Duplex {
constructor(options) {
super({
readableObjectMode: false,
writableObjectMode: true
});
this.createNewZipper();
this.batchSize = options.batchSize || 10_000;
}
createNewZipper() {
this.inputCount = 0;
this.currentZipStream = zlib.createGzip({ level: zlib.constants.Z_BEST_COMPRESSION });
this.currentZipStream.on("data", (chunk) => {
if (!this.push(chunk)) {
this.currentZipStream.pause();
}
})
}
_write(chunk, _encoding, callback) {
return this.currentZipStream.write(JSON.stringify(chunk) + "\n", (error) => {
if (error) {
return callback(error);
}
this.inputCount++;
if (this.inputCount === 10_000) {
// Don't call back that we're done until the current batch stream has been consumed!
this.currentZipStream.once("end", () => {
this.createNewZipper();
callback();
});
this.currentZipStream.end();
} else {
callback();
}
});
}
_final(callback) {
this.currentZipStream.once("end", () => {
this.currentZipStream = null;
callback();
});
this.currentZipStream.end();
}
_read(_size) {
if (this.currentZipStream) {
this.currentZipStream.resume();
}
}
}
await pipeline(
fs.createReadStream(inFilePath),
split2(),
async function* (source) {
for await (const line of source) {
if (line) {
yield JSON.parse(line);
}
}
},
new GzipBatchStream({ batchSize: 10_000 }),
fs.createWriteStream(`${inFilePath}.batchSubStream.gz`)
);
}
/**
* Simple batching: buffer up a large chunk of serialized output and then gzip
* the whole chunk and output that on the stream. Like
* `compressionBatchSubStream`, this winds up creating an output stream that is
* a bunch of gzips one after the other (which together also form a valid gzip.)
* This ought to take a lot more memory, and maybe also be slower. We'll see.
*/
async function compressionBatchChunks() {
let i = 0;
await pipeline(
fs.createReadStream(inFilePath),
split2(),
async function* (source) {
for await (const line of source) {
if (line) {
yield JSON.parse(line);
}
}
},
async function* (source) {
const gzipPromise = promisify(zlib.gzip);
let batch = "";
let batchSize = 0;
for await (const row of source) {
batchSize++;
batch += JSON.stringify(row) + "\n";
if (batchSize === 10_000) {
yield await gzipPromise(batch, { level: zlib.constants.Z_BEST_COMPRESSION });
batch = "";
batchSize = 0;
}
}
if (batch.length) {
yield await gzipPromise(batch, { level: zlib.constants.Z_BEST_COMPRESSION });
batch = "";
batchSize = 0;
}
},
fs.createWriteStream(`${inFilePath}.batchChunkStream.gz`)
);
}
/**
* Like `compressionBatchChunks`, but separates the batching from the gzipping.
* One steam outputs batches of 10,000 lines, and its output is consumed by a
* normal gzip stream, rather than the single shot `gzip()` call for each batch.
*/
async function compressionBatchChunks2(batchSize) {
await pipeline(
fs.createReadStream(inFilePath),
split2(),
async function* (source) {
for await (const line of source) {
if (line) {
yield JSON.parse(line);
}
}
},
async function* (source) {
for await (const row of source) {
yield JSON.stringify(row) + "\n";
}
},
async function* (source) {
let buffer = "";
let bufferSize = 0;
for await (const row of source) {
bufferSize++;
buffer += row;
if (bufferSize === batchSize) {
yield buffer;
buffer = "";
bufferSize = 0;
}
}
yield buffer;
},
zlib.createGzip({ level: zlib.constants.Z_BEST_COMPRESSION }),
fs.createWriteStream(`${inFilePath}.batchChunkStream2.gz`)
);
}
/**
* Like `compressionBatchChunks2`, but batches by bytes instead of by line.
*/
async function compressionBatchChunks2Bytes(batchSize) {
await pipeline(
fs.createReadStream(inFilePath),
split2(),
async function* (source) {
for await (const line of source) {
if (line) {
yield JSON.parse(line);
}
}
},
async function* (source) {
for await (const row of source) {
yield Buffer.from(JSON.stringify(row) + "\n", "utf8");
}
},
async function* (source) {
let buffer = Buffer.allocUnsafe(batchSize);
let bufferPosition = 0;
for await (const input of source) {
let inputPosition = 0;
while (inputPosition < input.length) {
const written = input.copy(buffer, bufferPosition, inputPosition);
inputPosition += written;
bufferPosition += written;
if (bufferPosition === batchSize) {
yield buffer;
buffer = Buffer.alloc(batchSize);
bufferPosition = 0;
}
}
}
// Emit any leftovers.
if (bufferPosition > 0) {
yield buffer.slice(0, bufferPosition);
}
},
zlib.createGzip({ level: zlib.constants.Z_BEST_COMPRESSION }),
fs.createWriteStream(`${inFilePath}.batchChunkStream2Bytes.gz`)
);
}
/**
* Like `compressionBatchChunks2Bytes`, but with proper streams instead of
* async generators.
*/
async function compressionBatchChunks2BytesProper({ batchSize, setHighWaterMark = false, maxMemLevel = false, setChunkSize = false, setGzipHighWaterMark = false }) {
// Couldn't find a good version of this on NPM (seems surprising, I'm probably
// missing it). But the `stream-chunker` package performs *terribly* (it's
// worse than no chunking at all!)
class BufferedStream extends stream.Transform {
constructor ({ size = 256 * 1024, setHighWaterMark = false } = {}) {
const options = {};
if (setHighWaterMark) options.readableHighWaterMark = size;
super(options);
this.size = size;
this.resetBuffer();
}
resetBuffer () {
this.buffer = Buffer.allocUnsafe(this.size);
this.offset = 0;
}
_transform (input, encoding, callback) {
if (typeof input === "string") {
input = Buffer.from(input, encoding);
} else if (!(input instanceof Buffer)) {
callback(new TypeError(`BufferedStream input must be strings or buffers, not ${input.constructor.name}`));
return;
}
let inputPosition = 0;
while (inputPosition < input.length) {
const written = input.copy(this.buffer, this.offset, inputPosition);
inputPosition += written;
this.offset += written;
if (this.offset === this.size) {
this.push(this.buffer);
this.resetBuffer();
}
}
callback();
}
_flush (callback) {
if (this.offset > 0) {
this.push(this.buffer.slice(0, this.offset));
}
callback();
}
_destroy (error, callback) {
this.buffer = null;
callback(error);
}
}
await pipeline(
fs.createReadStream(inFilePath),
split2(),
async function* (source) {
for await (const line of source) {
if (line) {
yield JSON.parse(line);
}
}
},
JSONStream.stringify(false),
new BufferedStream({ size: batchSize, setHighWaterMark }),
zlib.createGzip({
level: zlib.constants.Z_BEST_COMPRESSION,
memLevel: maxMemLevel ? zlib.constants.Z_MAX_LEVEL : zlib.constants.Z_DEFAULT_MEMLEVEL,
chunkSize: setChunkSize ? batchSize : undefined,
highWaterMark: setGzipHighWaterMark ? batchSize : undefined
}),
fs.createWriteStream(`${inFilePath}.batchChunkStream2BytesProper.gz`)
);
}
/**
* Like `compressionBatchChunks2Bytes`, but with a third-party component
* (stream-chunker).
*/
async function compressionBatchChunks2Bytes3p(batchSize) {
await pipeline(
fs.createReadStream(inFilePath),
split2(),
async function* (source) {
for await (const line of source) {
if (line) {
yield JSON.parse(line);
}
}
},
JSONStream.stringify(false),
streamChunker(batchSize, { flush: true, align: false }),
zlib.createGzip({ level: zlib.constants.Z_BEST_COMPRESSION }),
fs.createWriteStream(`${inFilePath}.batchChunkStream2Bytes3p.gz`)
);
}
async function main() {
const sizeArgument = process.argv.find(x => x.startsWith("--size="));
const batchSize = sizeArgument && parseInt(sizeArgument.match(/=(.*)$/)?.[1], 10) || 10_000;
console.log("Batch size:", batchSize);
const maxMemLevel = process.argv.includes('--max-mem-level');
console.log("maxMemLevel:", maxMemLevel);
const setHighWaterMark = process.argv.includes('--set-high-water-mark');
console.log("setHighWaterMark:", setHighWaterMark);
const setChunkSize = process.argv.includes('--set-chunk-size');
console.log("setChunkSize:", setChunkSize);
const setGzipHighWaterMark = process.argv.includes('--set-gzip-high-water-mark');
console.log("setGzipHighWaterMark:", setGzipHighWaterMark);
// Print memory usage every few seconds. This is optional so we can try a few
// runs without it and make sure it's not impacting timing.
if (process.argv.includes("--show-memory")) {
const formatter = new Intl.NumberFormat();
console.log("RSS\tHeap Total\tHeap Used\tExternal\tArrayBuffers")
setInterval(() => {
const usage = process.memoryUsage();
console.log([
formatter.format(usage.rss).padStart(11, " "),
formatter.format(usage.heapTotal).padStart(11, " "),
formatter.format(usage.heapUsed).padStart(11, " "),
formatter.format(usage.external).padStart(11, " "),
formatter.format(usage.arrayBuffers).padStart(11, " "),
].join("\t"));
}, 5_000).unref();
}
if (process.argv.includes('single-stream')) {
await compressionSingleStream();
}
if (process.argv.includes('batch-sub-stream')) {
await compressionBatchSubStream();
}
if (process.argv.includes('batch-chunk-stream')) {
await compressionBatchChunks();
}
if (process.argv.includes('batch-chunk-stream-2')) {
await compressionBatchChunks2(batchSize);
}
if (process.argv.includes('batch-chunk-stream-2-bytes')) {
await compressionBatchChunks2Bytes(batchSize);
}
if (process.argv.includes('batch-chunk-stream-2-bytes-proper')) {
await compressionBatchChunks2BytesProper({
batchSize,
maxMemLevel,
setHighWaterMark,
setChunkSize,
setGzipHighWaterMark
});
}
if (process.argv.includes('batch-chunk-stream-2-bytes-3p')) {
await compressionBatchChunks2Bytes3p(batchSize);
}
}
main().catch(error => {
console.log(error);
process.exitCode = 1;
});
@Mr0grog
Copy link
Author

Mr0grog commented Feb 8, 2022

Kind of fascinating and somewhat unexpected results here. On a t3.xlarge AWS EC2 instance:

  • compressionSingleStream

    Memory Type Average Min Max
    RSS 89,488,346 85,209,088 92,827,648
    Heap Total 42,295,599 39,436,288 45,789,184
    Heap Used 13,378,264 5,101,816 22,834,776
    External 18,290,079 521,917 146,535,307
    Array Buffers 1,149,985 215,237 4,735,054

    Rough timing over a few runs:
    Real Time: 9m5s
    User Time: 7m5s
    System Time: 4m30s

  • compressionBatchSubStream

    Memory Type Average Min Max
    RSS 94,440,617 91,013,120 98,271,232
    Heap Total 42,842,023 41,533,440 45,961,216
    Heap Used 12,364,521 4,519,680 20,152,344
    External 8,797,408 3,058,382 26,934,524
    Array Buffers 2,330,231 116,933 5,006,065

    Rough timing over a few runs:
    Real Time: 9m17s
    User Time: 7m30s
    System Time: 4m22s

  • compressionBatchChunks

    Memory Type Average Min Max
    RSS 115,447,515 105,644,032 131,264,512
    Heap Total 51,319,515 42,110,976 70,037,504
    Heap Used 23,190,954 7,004,656 48,968,584
    External 40,858,885 4,383,008 130,859,754
    Array Buffers 5,029,403 1,250,118 11,330,826

    Rough timing over a few runs:
    Real Time: 2m23s
    User Time: 2m23s
    System Time: 0m6s

Unsurprisingly, compressionBatchChunks takes more memory (29% – 41% more than compressionSingleStream). This will probably depend a lot on on the particular data on the stream, especially since this is chunking by line rather than by byte. That also matches up to the fact that memory usage was really unsteady over a 26 mB range, while compressionSingleStream was nice and steady in a 7 mB range.

On the other hand, the speed of compressionBatchChunks was a big surprise. There’s clearly a lot of overhead involved in streaming small chunks of data into zlib, and buffering up a big chunk for it to chew on gives an absurdly large speed boost.

Last time I worked with gzip in Node (long ago in the v0.x days), there were issues with actually streaming, and some manual chunking was needed. It’s clear that’s no longer the case, and all the fancy footwork I did to do it by hand was entirely pointless and performed about the same or slightly worse than just piping things through a built-in gzip stream. Yay!


Tested that the output is valid for all of these with:

# Check the first few lines
$ gzip --decompress --keep --stdout data/univaf_raw/availability_log-2021-09-21.ndjson.basicStream.gz | head -n 10

# Check the last few lines
$ gzip --decompress --keep --stdout data/univaf_raw/availability_log-2021-09-21.ndjson.basicStream.gz | tail -n 10

# Check lines near the 10,000 line boundary between chunks
$ gzip --decompress --keep --stdout data/univaf_raw/availability_log-2021-09-21.ndjson.basicStream.gz | head -n 10005 | tail -n 10

@Mr0grog
Copy link
Author

Mr0grog commented Feb 8, 2022

Given my surprise at the performance of compressionBatchChunks, I added a compressionBatchChunks2 routine that separates the batching and gzipping, so we have a stream that outputs large strings as batches and and then a normal gzip stream that consumes it (instead of gzipping the whole chunk as one call and outputting that).

  • compressionBatchChunks2

    Memory Type Average Min Max
    RSS 112,496,091 98,324,480 148,488,192
    Heap Total 54,839,696 42,377,216 87,490,560
    Heap Used 26,598,015 8,233,048 63,777,840
    External 87,834,419 2,938,557 392,328,303
    Array Buffers 3,616,445 223,429 14,988,181

    Rough timing over a few runs:
    Real Time: 2m20s
    User Time: 2m20s
    System Time: 0m5s

It’s not directly comparable since I shut down and restarted the EC2 instance in between, but at least a rough comparison is reasonable. It performs pretty similarly, both in terms of memory and time (higher peak memory usage, but averages are quite close). It’s conceptually simpler to engineer, so seems like a (small) universal improvement over compressionBatchChunks. But not monumentally different in any way.

@Mr0grog
Copy link
Author

Mr0grog commented Feb 18, 2022

I came back again for one more round of testing on this.

Since compressionBatchChunks2 taught me that we really don’t need to do any chunking of the resulting gzips to get low-memory streaming and compression, it stands to reason that memory usage could be more optimized if we batch by bytes instead of by number of lines. The added tests here look at that, and (unsurprisingly) it improves memory usage in general and also makes the range between minimum and maximum memory usage much smaller — the memory footprint stays generally consistent.

In the new code, I’ve added 3 methods:

  • compressionBatchChunks2Bytes works similarly to compressionBatchChunks2, except it creates buffers of N bytes instead of strings of N lines.

  • compressionBatchChunks2BytesProper is the same as the above, but the batching code is written as a transform stream instead of as an async generator. I was curious whether the async generators supported by stream.pipeline() imposed any extra overhead (or maybe even had less overhead) compared to normal streams, but writing the code as a stream also allowed me to test some optimizations around high water marks (which you can’t set for the generator). Those optimizations are:

    • Setting highWaterMark on the batching stream to match the size of the batches. This had no notable impact. (On the other hand, I can imagine how this might make a difference is data is coming into the batching stream more slowly.)
    • Setting memLevel on the gzip stream to the maximum value (that is, 9 instead of 8). This maybe has some minor speed improvements when the batch size is 64 kB – 128 kB, but it seems as likely that this is just an artifact of the particular data file I was testing with. (I don’t think it was not doing enough runs to average out random jitter, since the same pattern repeats in the next test.)
    • Setting highWaterMark on the batching stream and memLevel on the gzip stream. This performs basically the same as just using memLevel.
    • Setting chunkSize to match the batch size (the default is 16 kB) in addition to the above optimizations. This had a small but consistent speed improvement for batch sizes above 64 kB. In reality, I think what’s happening here is that the highWaterMark for filesystem streams is 64 kB, so this is really more about how few reads are needed to fill the next stream’s buffer, rather than anything about gzipping. In any case, this does mean that optimizing the gzip stream for the high water mark of the next stream can have a pretty big impact on overall performance, which is good to keep in mind.
    • Setting highWaterMark on the gzip stream in addition to the previous optimization. This had no notable impact.

    It’s worth noting that none of the above optimizations made a consistent or notable impact on memory usage.

    One other minor advantage here is that the async generators don’t work with the old-style stream created by JSONStream, while an actual stream object does. This isn’t a huge deal, though.

  • compressionBatchChunks2Bytes3p uses the stream-chunker package to batch up the data instead of custom code. It turns out to be incredibly inefficient, and actually makes the whole pipeline perform worse than anything else.

Overall, these approaches (excepting compressionBatchChunks2Bytes3p, which was just bad all around) improved both memory usage and time. I didn’t have a whole lot of time to dig into this, but I’m guessing most of the speed improvement between buffering by line and buffering by bytes is down doing more string operations in JS, which would generally be a bit more expensive that operating on bytes in buffers. It could also be that I made a poor estimation of average bytes per line and the comparison isn’t as comparable as I’d hope it to be. In any case, the memory improvements between them seem more obviously clear.

One interesting thing here is that user time tends to be pretty consistent between the different optimizations, so what the optimizations are affecting is likely to be mainly about a) how well zlib is able to spread its work across cores without bottlenecking and b) how much time is spent shuttling memory/data around between JS and zlib.

In most cases, performance seems pretty consistent with chunk sizes above 32 kB, and progressively slows down as the chunks get smaller. This makes sense, since the default windowBits for zlib is 15, which equates to a 32 kB data window to work with (note: the default is also the max; this can’t be made bigger). So as soon as the batches get smaller than that, we start wasting time waiting for data to fill zlib’s internal working buffer and things really to crawl.

Finally, memory usage between all the different byte buffering approaches here was pretty consistent, so I only included numbers from one of the methods in the tables below.

Overall Lessons Here

  • Gzip streams can work over large streams of data pretty efficiently and really don’t need much memory at all. You don’t need to manually break up your data into multiple gzip blocks to get efficient streaming output. (I’m not sure if this is due to an implementation change from the early days, if I was just remembering something incorrectly, or if I’d never rigorously tested what I’d read about this way back early on. 🤷 )

  • Batching data before it arrives at a gzip stream can massively improve both gzip speed and memory usage. Unless the timing of each byte on your stream is very inconsistent and very slow, you should probably always have a stream batch up data before piping/writing it to a gzip stream.

  • Batches should ideally be >= the window size you are using in zlib (by default, this is 32 kB). You can calculate this as batchSizeBytes = 1 << windowBits.

  • Setting zlib’s memLevel to the max did not make an appreciable difference on the data set I was working with.

  • Setting the high water mark on a zlib stream doesn’t make any noticeable difference.

  • When possible, matching a zlib stream’s chunkSize to the highWaterMark of whatever stream is reading from it can give a small but consistent speed boost.

Some Tables with Performance Measurements

All data comes from samples across 4 runs of of each method/optimization at each batch size. The timings in the table below are averages across all four runs.

-
Batched Lines Batched Bytes Batched Bytes with HighWaterMark Batched Bytes with MemLevel Batched Bytes with HighWaterMark + MemLevel Batched Bytes with HighWaterMark + MemLevel + ChunkSize Batched Bytes with HighWaterMark + MemLevel + ChunkSize + Gzip HighWaterMark
RealUserSystem RealUserSystem RealUserSystem RealUserSystem RealUserSystem RealUserSystem RealUserSystem
384 kB2m 20s 526ms2m 24s 22ms4s 866ms 1m 44s 560ms2m 29s 972ms4s 485ms 1m 43s 448ms2m 28s 370ms4s 438ms 1m 42s 825ms2m 31s 257ms4s 340ms 1m 42s 180ms2m 30s 183ms4s 237ms 1m 23s 670ms2m 29s 53ms3s 387ms 1m 23s 985ms2m 29s 760ms3s 197ms
256 kB2m 21s 242ms2m 24s 882ms5s 72ms 1m 43s 938ms2m 29s 910ms4s 455ms 1m 43s 642ms2m 29s 520ms4s 480ms 1m 45s 635ms2m 30s 167ms4s 598ms 1m 45s 498ms2m 30s 278ms4s 242ms 1m 26s 427ms2m 31s 428ms3s 310ms 1m 25s 787ms2m 30s 268ms3s 188ms
128 kB2m 23s 510ms2m 27s 320ms5s 840ms 1m 48s 245ms2m 30s 265ms4s 912ms 1m 46s 820ms2m 28s 357ms4s 603ms 1m 38s 127ms2m 28s 260ms4s 482ms 1m 38s 353ms2m 28s 507ms4s 465ms 1m 29s 922ms2m 30s 255ms3s 575ms 1m 29s 362ms2m 29s 590ms3s 485ms
64 kB2m 20s 620ms2m 24s 998ms6s 408ms 1m 44s 953ms2m 34s 810ms6s 423ms 1m 40s 715ms2m 28s 390ms4s 978ms 1m 31s 557ms2m 25s 590ms4s 487ms 1m 32s 445ms2m 26s 870ms4s 713ms 1m 31s 490ms2m 26s 605ms3s 975ms 1m 31s 662ms2m 27s 198ms3s 925ms
32 kB2m 23s 938ms2m 27s 446ms7s 226ms 1m 39s 337ms2m 33s 993ms6s 737ms 1m 34s 802ms2m 28s 97ms5s 648ms 1m 32s 365ms2m 29s 35ms5s 480ms 1m 32s 770ms2m 29s 490ms5s 848ms 1m 29s 265ms2m 23s 547ms5s 112ms 1m 29s 90ms2m 23s 748ms4s 875ms
16 kB2m 28s 364ms2m 30s 806ms8s 576ms 1m 54s 538ms2m 41s 300ms8s 488ms 1m 48s 490ms2m 34s 162ms7s 725ms 1m 46s 892ms2m 33s 405ms7s 158ms 1m 46s 705ms2m 33s 268ms7s 178ms 1m 44s 40ms2m 29s 938ms7s 32ms 1m 42s 873ms2m 28s 690ms6s 702ms
8 kB2m 17s 626ms2m 47s 722ms10s 930ms 2m 16s 752ms2m 44s 192ms10s 720ms 2m 16s 37ms2m 43s 570ms10s 705ms 2m 11s 238ms2m 38s 775ms9s 830ms 2m 15s 420ms2m 43s 232ms10s 477ms 2m 10s 97ms2m 37s 523ms10s 43ms 2m 5s 370ms2m 35s 235ms9s 987ms
4 kB2m 45s 2ms3m 1s 602ms17s 54ms 2m 36s 107ms2m 50s 768ms15s 790ms 2m 38s 463ms2m 53s 37ms16s 12ms 2m 38s 15ms2m 52s 218ms15s 825ms 2m 39s 782ms2m 53s 953ms16s 157ms 2m 30s 865ms2m 44s 75ms16s 655ms 2m 26s 620ms2m 41s 398ms16s 450ms
2 kB--- 3m 5s 350ms3m 10s 32ms27s 670ms 3m 3s 373ms3m 7s 813ms27s 480ms 3m 5s 2ms3m 9s 303ms27s 490ms 3m 7s 102ms3m 10s 857ms27s 960ms 2m 59s 490ms3m 1s 750ms31s 335ms 2m 55s 855ms2m 59s 183ms30s 680ms
1 kB4m 7s 480ms3m 57s 560ms55s 32ms 3m 57s 37ms3m 46s 927ms54s 477ms 3m 57s 162ms3m 47s 25ms54s 760ms 3m 53s 578ms3m 44s 618ms52s 545ms 3m 54s 555ms3m 44s 322ms52s 935ms 3m 47s 550ms3m 35s 745ms1m 0s 982ms 3m 46s 415ms3m 34s 993ms1m 0s 417ms

This is the memory usage from all 4 runs of compressionBatchChunks2BytesProper with all the optimizations on. It’s not appreciably different from any other combination of optimizations or from compressionBatchChunks2Bytes.

Buffer SizeMemory TypeAverageMinMax
384 kBRSS104,030,27297,927,168118,628,352
Heap Total40,648,57639,706,62443,089,920
Heap Used12,176,9664,901,44020,850,632
External57,372,0709,660,953213,398,241
ArrayBuffers8,165,5642,582,72521,719,237
256 kBRSS100,179,48696,157,696106,569,728
Heap Total40,695,74839,444,48042,987,520
Heap Used11,943,6015,173,79220,279,872
External43,228,4203,893,785244,273,467
ArrayBuffers8,046,5042,607,30120,039,877
128 kBRSS94,362,56491,574,27298,009,088
Heap Total41,277,80139,444,48044,130,304
Heap Used12,596,8295,155,88020,859,848
External38,256,0114,393,497128,831,350
ArrayBuffers3,938,7311,468,6138,360,145
64 kBRSS90,714,80483,144,70494,695,424
Heap Total41,769,50839,706,62445,133,824
Heap Used13,905,1695,619,83223,282,328
External25,749,1512,558,48999,488,492
ArrayBuffers3,069,412846,0218,441,476
32 kBRSS91,643,36287,035,90496,342,016
Heap Total42,865,78439,968,76850,642,944
Heap Used13,651,2187,118,32021,181,032
External15,561,6781,788,44141,385,533
ArrayBuffers2,523,312714,9496,515,831
16 kBRSS92,032,30788,014,84898,107,392
Heap Total43,989,29940,230,91252,494,336
Heap Used15,607,4416,186,75227,250,312
External7,777,3471,086,62664,537,265
ArrayBuffers2,530,505714,9496,241,937
8 kBRSS92,965,28287,474,176100,810,752
Heap Total44,862,02540,493,05651,908,608
Heap Used16,094,6966,113,59226,453,416
External5,992,805936,47335,004,788
ArrayBuffers2,063,636411,8456,147,329
4 kBRSS92,521,30386,949,888100,409,344
Heap Total44,862,45540,230,91255,087,104
Heap Used16,232,2157,344,27227,337,672
External5,466,8031,227,28937,395,032
ArrayBuffers2,083,278403,6535,817,932
2 kBRSS91,437,51386,269,95298,705,408
Heap Total43,407,22740,755,20050,028,544
Heap Used14,673,0084,878,20027,261,088
External8,205,030576,02527,599,519
ArrayBuffers1,943,753174,2777,317,701
1 kBRSS90,928,37885,712,89695,481,856
Heap Total43,149,01640,230,91248,041,984
Heap Used13,615,6295,325,60023,040,816
External12,124,117576,02564,054,792
ArrayBuffers1,906,545248,0055,843,141

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment