Skip to content

Instantly share code, notes, and snippets.

@ggondim
Created November 3, 2022 03:37
Show Gist options
  • Save ggondim/a0e1ebd5539cd261462d3d457ff01e31 to your computer and use it in GitHub Desktop.
Save ggondim/a0e1ebd5539cd261462d3d457ff01e31 to your computer and use it in GitHub Desktop.
ggondim's Bucket-Matrioska-Union Algorithm
// ggondim's Bucket-Matrioska-Union Algorithm
type Item = {
key: string,
properties: object,
setOrder: number,
setKey: string,
setVolume: number,
setSize: number,
setProperties: object,
};
type HashMap<T = null> = { [k: string]: T };
function exists(x: unknown) {
return typeof x !== 'undefined';
}
type Bucket = {
limit: number,
volume: number,
items: Item[],
itemMap: HashMap<number>,
setMap: HashMap<HashMap<number>>,
removedSetMap: HashMap,
};
type BDedupeAComparison = (b: Item, a: Item) => boolean;
function makeBuckets(limits: number[]) {
return limits.map(limit => ({
limit,
volume: 0,
items: [] as Item[],
itemMap: {} as HashMap<number>,
setMap: {} as HashMap<HashMap<number>>,
removedSetMap: {} as HashMap,
} as Bucket));
}
/**
* Generates a bucket plan (a map of sets that fills is a bucket, without remapping the original
* items array) for each bucket
* @param {number[]} limits Limits of buckets in ascending order.
* @param {Item[]} items Items ordered by `setOrder`.
* @param {BDedupeAComparison} bDedupeA Function to compare an existent set (A) with a new one (B).
* @returns {Bucket[]} A list of filled buckets.
*/
function fillBuckets2(
limits: number[],
items: Item[],
bDedupeA: BDedupeAComparison,
): Bucket[] {
const buckets = makeBuckets(limits);
for (let i = 0; i < items.length; i++) {
const item = items[i];
for (let b = 0; b < buckets.length; b++) {
const bucket = buckets[b];
if (bucket.removedSetMap[item.setKey]) {
// case: set should not replace any existent one
// action: do nothing
continue;
}
if (exists(bucket.itemMap[item.key]) && exists(bucket.setMap[item.setKey])) {
// case: duplicate items in same set
// action: do nothing
// case: item from another set
// action: just remap the item index
bucket.itemMap[item.key] = i;
continue;
}
if (exists(bucket.itemMap[item.key]) && !exists(bucket.setMap[item.setKey])) {
// case: existent item from another set
// actions:
// 1. check if existent should be replaced
// 2. check if new set could be added (new volume < limit)
// 3. remove existent set and add new one
// 4. add item to map
const existent = items[bucket.itemMap[item.key]];
// (1)
const removeExistent = bDedupeA(item, existent);
if (!removeExistent) {
// new set should never be added to any bucket
bucket.removedSetMap[item.setKey] = null;
continue;
}
// (2)
const volAfterDedupe = bucket.volume - existent.setVolume + item.setVolume;
if (volAfterDedupe > bucket.limit) {
// set should not be added to this bucket, but it could be to another
continue;
}
// (3)
bucket.removedSetMap[existent.setKey] = null;
Reflect.deleteProperty(bucket.setMap, existent.setKey);
bucket.setMap[item.setKey] = {} as HashMap<number>;
bucket.volume += item.setVolume;
// (4)
bucket.setMap[item.setKey][item.key] = i;
bucket.itemMap[item.key] = i;
}
if (!exists(bucket.itemMap[item.key]) && exists(bucket.setMap[item.setKey])) {
// case: new item of an added set
// action: just add item to map
bucket.setMap[item.setKey][item.key] = i;
bucket.itemMap[item.key] = i;
continue;
}
if (!exists(bucket.itemMap[item.key]) && !exists(bucket.setMap[item.setKey])) {
// case: new item and new set
// 1. check if new set could be added (new volume < limit)
// 2. add new set
// 3. add item to map
const volAfterAdd = bucket.volume + item.setVolume;
if (volAfterAdd > bucket.limit) {
// set should not be added to this bucket, but it could be to another
continue;
}
bucket.setMap[item.setKey] = {} as HashMap<number>;
bucket.volume += item.setVolume;
bucket.setMap[item.setKey][item.key] = i;
bucket.itemMap[item.key] = i;
}
}
}
return buckets;
}
/**
* Filters an item list by a bucket plan.
* @param bucket A bucket plan
* @param items Items to materialize from the bucket plan
*/
function* materializeBucket(
bucket: Bucket,
items: Item[],
) {
const setKeys = Object.keys(bucket.setMap);
for (let s = 0; s < setKeys.length; s++) {
const itemKeys = Object.keys(bucket.setMap[setKeys[s]]);
for (let i = 0; i < itemKeys.length; i++) {
yield items[bucket.setMap[setKeys[s]][itemKeys[i]]];
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment