Skip to content

Instantly share code, notes, and snippets.

@mmomtchev
Last active April 7, 2021 09:32
Show Gist options
  • Save mmomtchev/14c3428255fd9c88ed20517572d8efdc to your computer and use it in GitHub Desktop.
Save mmomtchev/14c3428255fd9c88ed20517572d8efdc to your computer and use it in GitHub Desktop.
gdal.LayerFeatures.get vs gdal.LayersFeatures.getAsync
const gdal = require('../node-gdal-async');
const perf = require('perf_hooks').performance;
const PerformanceObserver = require('perf_hooks').PerformanceObserver;
const elu = perf.eventLoopUtilization();
console.time('open');
const dataset = gdal.open('admin.geojson');
console.timeEnd('open');
console.time('count');
const len = dataset.layers.get(0).features.count();
console.timeEnd('count');
let events = 0;
let duration = 0;
const perfObserver = new PerformanceObserver((items) => {
items.getEntries().forEach((entry) => {
duration += entry.duration;
events++;
})
});
perfObserver.observe({ entryTypes: ['measure'], buffer: true });
console.time('get');
for (let i = 0; i < len; i++) {
perf.mark(`get-start-${i}`);
const f = dataset.layers.get(0).features.get(i);
perf.mark(`get-end-${i}`);
perf.measure(`get-${i}`, `get-start-${i}`, `get-end-${i}`);
// Break the optimizer
if (f.geometry === 3)
console.log(3);
}
console.timeEnd('get');
if (perf.nodeTiming.loopStart >= 0)
console.log('eventLoopUtilization', perf.eventLoopUtilization(elu).utilization);
else
console.log('eventLoop didn\'t start (100% sync code)');
console.log('gets', events, 'per-get', (duration / events).toFixed(4) + 'ms');
const gdal = require('../node-gdal-async');
const perf = require('perf_hooks').performance;
const PerformanceObserver = require('perf_hooks').PerformanceObserver;
let events = 0;
let duration = 0;
const threads = require('os').cpus().length;
const elu = perf.eventLoopUtilization();
const perfObserver = new PerformanceObserver((items) => {
items.getEntries().forEach((entry) => {
duration += entry.duration;
events++;
})
});
perfObserver.observe({ entryTypes: ['measure'], buffer: true });
for (let t = 0; t < threads; t++) {
console.time(`open-${t}`);
gdal.openAsync('admin.geojson', (e, dataset) => {
dataset.id = t;
console.timeEnd(`open-${t}`);
console.time(`count-${t}`);
const len = dataset.layers.get(0).features.count();
console.timeEnd(`count-${t}`);
console.time(`get-${t}`);
for (let i = t; i < len; i += threads) {
perf.mark(`get-start-${t}-${i}`);
dataset.layers.get(0).features.getAsync(i, (e, f) => {
perf.mark(`get-end-${t}-${i}`);
perf.measure(`get-${t}-${i}`, `get-start-${t}-${i}`, `get-end-${t}-${i}`);
if (f.geometry === 3)
console.log(3);
});
}
});
}
process.on('exit', () => {
for (let t = 0; t < threads; t++) {
console.timeEnd(`get-${t}`);
}
if (perf.nodeTiming.loopStart >= 0)
console.log('eventLoopUtilization', perf.eventLoopUtilization(elu).utilization);
else
console.log('eventLoop didn\'t start (100% sync code)');
console.log('gets', events, 'per-get', (duration / events).toFixed(4) + 'ms');
});
const { on } = require('process');
const gdal = require('../node-gdal-async');
const perf = require('perf_hooks').performance;
const PerformanceObserver = require('perf_hooks').PerformanceObserver;
let events = 0;
let duration = 0;
const elu = perf.eventLoopUtilization();
console.time('open');
gdal.openAsync('admin.geojson', (e, dataset) => {
console.timeEnd('open');
console.time('count');
const len = dataset.layers.get(0).features.count();
console.timeEnd('count');
const perfObserver = new PerformanceObserver((items) => {
items.getEntries().forEach((entry) => {
duration += entry.duration;
events++;
})
});
perfObserver.observe({ entryTypes: ['measure'], buffer: true });
console.time('get');
for (let i = 0; i < len; i++) {
perf.mark(`get-start-${i}`);
dataset.layers.get(0).features.getAsync(i, (e, f) => {
perf.mark(`get-end-${i}`);
perf.measure(`get-${i}`, `get-start-${i}`, `get-end-${i}`);
if (f.geometry === 3)
console.log(3);
});
}
});
process.on('exit', () => {
console.timeEnd('get');
if (perf.nodeTiming.loopStart >= 0)
console.log('eventLoopUtilization', perf.eventLoopUtilization(elu).utilization);
else
console.log('eventLoop didn\'t start (100% sync code)');
console.log('gets', events, 'per-get', (duration / events).toFixed(4) + 'ms');
});
@mmomtchev
Copy link
Author

This gist opens 4 datasets on the same file to be able to read with 4 threads. One must pay 4x times the open cost to get a marginal improvement in speed - in the order of 20%:

open-0: 2.771s
count-0: 0.079ms
open-1: 2.848s
count-1: 0.056ms
open-2: 2.883s
count-2: 0.052ms
open-3: 2.888s
count-3: 0.037ms
get-0: 4.688s
get-1: 4.600s
get-2: 4.565s
get-3: 4.560s
eventLoopUtilization 0.03760332186211492
gets 662 per-get 3719.6174ms

There is also one severe problem with this approach that is general in Node and cannot be easily solved: the maximum number of threads is limited to UV_THREADPOOL_SIZE, 4 by default. This means that when launching a test that creates 1000 async contexts, Node/libuv will randomly choose 4 of those async contexts to run simultaneously. If they happen to be independent, that's good. If they are waiting on each other, well, they will just run sequentially, starving out the others, leaving you with no other option than to increase UV_THREADPOOL_SIZE.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment