Skip to content

Instantly share code, notes, and snippets.

@ibsusu
Last active November 14, 2022 10:45
Show Gist options
  • Save ibsusu/0c424504b206949f809516b04e82de5d to your computer and use it in GitHub Desktop.
Save ibsusu/0c424504b206949f809516b04e82de5d to your computer and use it in GitHub Desktop.
arraybuffer back objects with webworker sorting
/// benchmark.js
async function benchmark() {
let namePromises = [];
let namePromises2 = [];
for (let i=0;i<100000;++i){
namePromises.push(utils.getRandomName()); // you'll need to make your own
namePromises2.push(utils.getRandomName()); // same here
}
let names = await Promise.all(namePromises);
let names2 = await Promise.all(namePromises2);
window.items = [];
console.log("creating items");
let start = window.performance.now();
for(let i=0;i<names.length;++i){
items.push({otherKey: "cool", name:`${names[i]}${names2[i]}`});
}
console.log(`finished creating items in ${(window.performance.now() - start)/1000} seconds`);
let bbos = [];
console.log("creating bbos");
start = window.performance.now();
let descriptors = {
name: Bbo.UTF8String(90),
otherKey: Bbo.UTF8String(10)
};
const descriptorSize = window.structSize(descriptors);
const buf = new ArrayBuffer(descriptorSize*names.length);
window.boar = new Boar(buf, descriptors);
const buf2 = new ArrayBuffer(descriptorSize*names.length);
window.boar2 = new Boar(buf2, descriptors, workerGlue);
console.log("boars", boar, boar2);
for(let i=0;i<names.length;++i){
// const bbo = new Bbo(buf, {
// name: Bbo.UTF8String(90),
// otherKey: Bbo.UTF8String(10)
// }, descriptorSize*i);
const bbo = boar[i];
bbo.name = `${names[i]}${names2[i]}`;
bbo.otherKey = "cool";
const bbo2 = boar2[i];
bbo2.name = `${names[i]}${names2[i]}`;
bbo2.otherKey = "cool";
}
console.log(`finished creating bbos in ${(window.performance.now() - start)/1000} seconds`);
function compare(itemA, itemB){
if (itemA.name < itemB.name){
return -1;
}
if (itemA.name > itemB.name){
return 1;
}
return 0;
}
console.log("starting the item sort");
start = window.performance.now();
items.sort(compare);
console.log(`finished sorting in ${(window.performance.now() - start)/1000} seconds`);
console.log("starting the bbo sort");
start = window.performance.now();
await boar.sort(compare);
console.log(`finished sorting in ${(window.performance.now() - start)/1000} seconds`);
console.log("starting the bbo2 sort");
start = window.performance.now();
await boar2.sort("name");
console.log(`finished sorting in ${(window.performance.now() - start)/1000} seconds`);
}
/**
* Copyright 2020 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// `globalThis` polyfill
(function () {
if (typeof globalThis === "object") return;
Object.prototype.__defineGetter__("__magic__", function () {
return this;
});
__magic__.globalThis = __magic__; // lolwat
delete Object.prototype.__magic__;
})();
// Like `isNaN` but returns `true` for symbols.
function betterIsNaN(s) {
if (typeof s === "symbol") {
return true;
}
return isNaN(s);
}
function structSize(descriptors) {
let stride = 0;
for (const { size } of Object.values(descriptors)) {
stride += size;
}
return stride;
}
function ArrayOfBufferBackedObjects(
buffer,
descriptors,
worker,
{ byteOffset = 0, length = 0 } = {}
) {
console.log("arrayofbufferbackedobjects", buffer, descriptors, worker);
let dataView = new DataView(buffer, byteOffset);
// Accumulate the size of one struct
let stride = 0;
// this is a
// Copy
descriptors = Object.assign({}, descriptors);
for (const [name, descriptor] of Object.entries(descriptors)) {
// Copy second layer and add offset property
descriptors[name] = Object.assign({}, descriptor, { offset: stride });
stride += descriptor.size;
}
if (!length) {
length = Math.floor((buffer.byteLength - byteOffset) / stride);
}
return new Proxy(new Array(length), {
has(target, propName) {
// The underlying array is hole-y, but we want to pretend that it is not.
// So we need to return `true` for all indices so that `map` et al. work
// as expected.
if (!betterIsNaN(propName)) {
return propName < length;
}
if (propName === "buffer") {
return true;
}
if (propName === "sort") {
return true;
}
return propName in target;
},
get(target, propName, proxy) {
if (propName === "buffer") {
return buffer;
}
if (propName === "dataView"){
return dataView;
}
if (propName === "byteOffset"){
return byteOffset;
}
if (propName === "size") {
function sizer(keyName) {
return descriptors[keyName].size;
}
return sizer;
}
if (propName === "worker"){
return worker;
}
if (propName === "descriptors") {
let descs = {};
for (let key in descriptors) {
descs[key] = descriptors[key].size;
}
return descs;
}
if (propName === "sort") {
async function sorter(compareFunctionOrKey) {
// everything is sorted by the byte order of the first key in the descriptor
// or the key of the descriptor that's passed in.
// if the input is a string then we can do the work on a webworker
// the assumption is that it's a key in the descriptor
// console.log("sorter branch check,", compareFunctionOrKey, typeof compareFunctionOrKey, worker);
if((!compareFunctionOrKey || typeof compareFunctionOrKey === 'string') && worker){
const key = compareFunctionOrKey;
let keyOffset = 0;
let keySize = 0;
let firstDescriptor;
// get the keysize and offset
for(const descriptor in descriptors){
if(!firstDescriptor){
firstDescriptor = descriptor;
}
// default to the first one if the key doesn't exist
if(!key){
key = descriptor;
keySize = descriptors[descriptor].size;
break;
}
// increment the offset
if(key !== descriptor){
keyOffset += descriptors[descriptor].size;
continue;
}
// get the keysize
keySize = descriptors[descriptor].size;
break;
}
// console.log("before worker request");
// request sorting from the worker, we don't have a keySize if they
if(keySize !== 0){
buffer = await worker.sort(this.buffer, keySize, keyOffset, stride);
dataView = new DataView(buffer, byteOffset);
for(let i=0;i<target.length;++i){
delete target[i];
// console.log("looping through target", i);
}
return;
}
}
function compare(itemA, itemB){
if (itemA[firstDescriptor] < itemB[firstDescriptor]){
return -1;
}
if (itemA[firstDescriptor] > itemB[firstDescriptor]){
return 1;
}
return 0;
}
compareFunctionOrKey = compareFunctionOrKey ?? compare;
// at this point we need to keep the work on the current thread
// because it's not possible to send functions to the webworker without running eval. we don't run eval.
quickSort(proxy, compareFunctionOrKey);
}
return sorter;
}
if (betterIsNaN(propName)) {
let prop = target[propName];
if (typeof prop === "function") {
prop = prop.bind(proxy);
}
return prop;
}
// getting an array index for normal indexing
const idx = parseInt(propName);
const itemOffset = idx * stride;
// Just like real arrays, we return `undefined`
// outside the boundaries.
if (idx >= target.length) {
return undefined;
}
if (!target[idx]) {
target[idx] = new Proxy(
new Uint8Array(dataView.buffer, itemOffset + byteOffset, stride),
{
has(target, propName) {
return propName in target || propName in descriptors;
},
get(target, propName, proxy) {
// if it's a property native to Uint8Array return it
if (target[propName]) {
return target[propName];
}
// without this we just get a json stringified Uint8Array.
if (propName === "toJSON") {
return () => {
const abstractObject = {};
for (const name in descriptors) {
const descriptor = descriptors[name];
// skipping reserved
if (!descriptor.get) {
continue;
}
abstractObject[name] = descriptor.get(
target.subarray(
descriptor.offset,
descriptor.offset + descriptor.size
)
);
}
return abstractObject;
};
}
// check for the descriptor now
let descriptor = descriptors[propName];
if (!descriptor) {
return undefined;
}
// return descriptor.get(new DataView(target.buffer, itemOffset, stride), descriptor.offset);
return descriptor.get(
target.subarray(
descriptor.offset,
descriptor.offset + descriptor.size
)
);
},
set(target, propName, value, receiver) {
if (propName === "underlyingArray") {
target.set(value);
return true;
}
let descriptor = descriptors[propName];
if (!descriptor) {
return false;
}
descriptor.set(
target.subarray(
descriptor.offset,
descriptor.offset + descriptor.size
),
value
);
return true;
},
}
);
}
return target[idx];
},
set(target, propName, value, proxy) {
// if(propName === 'buffer'){
// console.log("receiving set buffer", propName, value);
// // this is a full reset,
// target.buffer = value;
// return true;
// }
if(propName === 'worker'){
target.worker = value;
return true;
}
const idx = parseInt(propName);
const itemOffset = idx * stride;
if (!target[idx]) {
target[idx] = new Proxy(
new Uint8Array(dataView.buffer, itemOffset + byteOffset, stride),
{
has(target, propName) {
return propName in target || propName in descriptors;
},
get(target, propName, proxy) {
// if it's a property native to Uint8Array return it
if (target[propName]) {
return target[propName];
}
// without this we just get a json stringified Uint8Array.
if (propName === "toJSON") {
return () => {
const abstractObject = {};
for (const name in descriptors) {
const descriptor = descriptors[name];
// skipping reserved
if (!descriptor.get) {
continue;
}
abstractObject[name] = descriptor.get(
target.subarray(
descriptor.offset,
descriptor.offset + descriptor.size
)
);
}
return abstractObject;
};
}
// check for the descriptor now
let descriptor = descriptors[propName];
if (!descriptor) {
return undefined;
}
// return descriptor.get(new DataView(target.buffer, itemOffset, stride), descriptor.offset);
return descriptor.get(
target.subarray(
descriptor.offset,
descriptor.offset + descriptor.size
)
);
},
set(target, propName, value, receiver) {
let descriptor = descriptors[propName];
if (!descriptor) {
return false;
}
descriptor.set(
target.subarray(
descriptor.offset,
descriptor.offset + descriptor.size
),
value
);
return true;
},
}
);
}
target[idx].underlyingArray = value;
return true;
},
});
}
function BufferBackedObject(
buffer,
descriptors,
{ byteOffset = 0 } = {}
) {
return ArrayOfBufferBackedObjects(buffer, descriptors, { byteOffset })[0];
}
[
"Uint16",
"Uint32",
"Int16",
"Int32",
"Float32",
"Float64",
"BigInt64",
"BigUint64",
].forEach((name) => {
BufferBackedObject[name] = ({ endianess: endianness = "big" } = {}) => {
if (endianness !== "big" && endianness !== "little") {
throw Error("Endianness needs to be either 'big' or 'little'");
}
const littleEndian = endianness === "little";
return {
size: globalThis[`${name}Array`].BYTES_PER_ELEMENT,
get: (u8Array) => {
return new DataView(
u8Array.buffer,
u8Array.byteOffset,
u8Array.byteLength
)[`get${name}`](0, littleEndian);
},
set: (u8Array, value) => {
new DataView(u8Array.buffer, u8Array.byteOffset, u8Array.byteLength)[
`set${name}`
](0, value, littleEndian);
},
};
};
});
BufferBackedObject.Uint8 = () => ({
size: 1,
get: (u8Array) => u8Array[0],
set: (u8Array, value) => (u8Array[0] = value),
});
BufferBackedObject.Int8 = () => ({
size: 1,
get: (u8Array) =>
new DataView(u8Array.buffer, u8Array.byteOffset, 1).getInt8(),
set: (u8Array, value) => (u8Array[0] = value),
});
BufferBackedObject.ArrayBuffer = (size) => ({
size,
get: (u8Array) => u8Array.subarray(0, size),
set: (u8Array, value) => u8Array.set(new Uint8Array(value)),
});
BufferBackedObject.NestedBufferBackedObject = (descriptors) => {
const size = structSize(descriptors);
return {
size,
get: (u8Array) =>
new ArrayOfBufferBackedObjects(u8Array.buffer, descriptors, {
byteOffset: u8Array.byteOffset,
length: 1,
})[0],
set: (u8Array, value) => {
throw Error("Can’t set an entire struct");
},
};
};
BufferBackedObject.NestedArrayOfBufferBackedObjects = (length, descriptors) => {
const size = structSize(descriptors) * length;
return {
size,
get: (u8Array) =>
new ArrayOfBufferBackedObjects(u8Array.buffer, descriptors, {
byteOffset: u8Array.byteOffset,
length,
}),
set: (u8Array, value) => {
throw Error("Can’t set an entire array");
},
};
};
BufferBackedObject.UTF8String = (maxBytes) => {
return {
size: maxBytes,
get: (u8Array) => {
return new TextDecoder().decode(u8Array).replace(/\u0000+$/, "");
},
set: (u8Array, value) => {
const encoding = new TextEncoder().encode(value);
u8Array.fill(0);
u8Array.set(encoding.subarray(0, maxBytes));
},
};
};
BufferBackedObject.reserved = (size) => ({ size });
// sorting function. timsort is better but more complicated. I just need this to work.
function quickSort(arr, compare, left = 0, right = arr.length - 1) {
let len = arr.length;
let index;
if (len > 1) {
index = partition(arr, compare, left, right);
if (left < index - 1) {
quickSort(arr, compare, left, index - 1);
}
if (index < right) {
quickSort(arr, compare, index, right);
}
}
return arr;
}
function partition(arr, compare, left, right) {
let middle = Math.floor((right + left) / 2);
let pivot = arr[middle];
let i = left;
let j = right;
while (i <= j) {
while (compare(arr[i], pivot) < 0) {
i++;
}
while (compare(arr[j], pivot) > 0) {
j--;
}
let buf = new ArrayBuffer(arr[0].length);
let swap = new Uint8Array(arr[0].length);
if (i <= j) {
swap.set(arr[i]);
arr[i] = arr[j];
arr[j] = swap;
i++;
j--;
}
}
return i;
}
//worker.js
importScripts(
"comlink.js",
"buffer-backed-objects-with-webworker-sorting.js" // not really used on the worker side.
);
// quicksort in uint8arrays
function quickSort(arr, length, keySize, keyOffset, stride, left = 0, right = length - 1) {
let index;
if (length > 1) {
index = partition(arr, length, keySize, keyOffset, stride, left, right);
if (left < index - 1) {
quickSort(arr, length, keySize, keyOffset, stride, left, index - 1);
}
if (index < right) {
quickSort(arr, length, keySize, keyOffset, stride, index, right);
}
}
return arr;
}
function partition(arr, length, keySize, keyOffset, stride, left, right) {
let middle = Math.floor((right + left) / 2);
// let pivot = arr[middle];
let decoder = new TextDecoder();
let pivot = decoder.decode(arr.subarray(middle*stride+keyOffset, middle*stride+keySize));
let i = left;
let j = right;
while (i <= j) {
// while (arr[i*stride] < pivot) {
while (decoder.decode(arr.subarray(i*stride+keyOffset, i*stride+keySize)) < pivot) {
i++;
}
// while (arr[j*stride] > pivot) {
while (decoder.decode(arr.subarray(j*stride+keyOffset, j*stride+keySize)) > pivot) {
j--;
}
let swap = new Uint8Array(stride);
if (i <= j) {
swap.set(arr.subarray(i*stride, i*stride+stride));
arr.set(arr.subarray(j*stride, j*stride+stride), i*stride);
arr.set(swap, j*stride);
i++;
j--;
}
}
return i;
}
const worky = {};
worky.sort = function(buffer, keySize, keyOffset, stride) {
// console.log("worky sorting", buffer, keySize, keyOffset, stride);
const u8Array = new Uint8Array(buffer);
const length = u8Array.length / stride;
quickSort(u8Array, length, keySize, keyOffset, stride, 0, length-1);
return Comlink.transfer(buffer, [buffer]);
};
Comlink.expose(worky);
//workerGlue.js
const workerGlue = {};
const worker = new Worker('worker.js');
const comWorker = Comlink.wrap(worker);
workerGlue.sort = async function(buffer, keySize, keyOffset, stride){
let sameBuf = await comWorker.sort(Comlink.transfer(buffer, [buffer]), keySize, keyOffset, stride);
return sameBuf;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment