Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Non-comparative sorting with flexible chunking
function sort(array, radix, getDigit) {
const counts = new Uint32Array(radix);
const jobs = [{start:0, end:array.length, offset:0}];
while (jobs.length) {
const {start, end, offset} = jobs.pop();
counts.fill(0);
counts[null] = start;
const sorting = array.slice(start, end);
for (let index = 0; index < sorting.length; ++index) {
const value = getDigit(sorting[index], offset);
++counts[(value || value == 0) ? value : null];
}
let total = counts[null];
counts[null] = start;
for (let value = 0; value < counts.length; ++value) {
const count = counts[value];
if (!count)
continue;
counts[value] = total;
total += count;
if (count > 1)
jobs.push({start:counts[value], end:total, offset:offset + 1});
}
for (let index = 0; index < sorting.length; ++index) {
const value = getDigit(sorting[index], offset);
array[
counts[(value || value == 0) ? value : null]++
] = sorting[index];
}
}
}
//const array = ['why there', 'why', 'why hello', 'you', 'smelly'];
//['smelly', 'why', 'why hello', 'why there', 'you']
//these method calls behave identically to Array.prototype.sort()
//per two-byte, same as, but much slower than, Array.prototype.sort()
sort(
array,
1 << 16,
(element, offset) => (element.charCodeAt(offset))
);
//per byte, a little slower than Array.prototype.sort()
sort(
array,
1 << 8,
(element, offset) => {
if (offset / 2 >= element.length)
return null;
const code = element.charCodeAt(offset / 2);
return (offset % 2) ? code & 0xFF : code >> 8;
}
);
//these method calls sort the strings,
//but do not sort surrogate pairs as two different characters,
//instead respecting the intended single character value
//requires some preprocessing
const utf8 = new TextEncoder();
for (let index = 0; index < array.length; ++index) {
array[index] = {
string : array[index],
chars : utf8.encode(array[index])
}
}
//utf8 byte-by-byte sorting is identical to full-width 21bit Unicode sorting
//this is much faster than sorting with regular strings,
//even with the built-in sort, but the preprocess step above is slow,
//this more shows off the algorithm's support for other data types,
//or if you happen to be sorting utf8 buffers already
sort(
array,
1 << 8,
(element, offset) => (element.chars[offset])
);
//you can even sort per half-byte if you wish
sort(
array,
1 << 4,
(element, offset) => {
if (offset / 2 >= element.length)
return null;
const code = element.chars[offset / 2 | 0];
return (offset % 2) ? code & 0xF : code >> 4;
}
);
//and post-processing
for (let index = 0; index < array.length; ++index)
array[index] = array[index].string;
@Hashbrown777
Copy link
Author

Hashbrown777 commented Dec 7, 2021

performance

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment