Skip to content

Instantly share code, notes, and snippets.

@JobLeonard
Last active November 16, 2021 08:40
Show Gist options
  • Save JobLeonard/eb482c82dfa0a5b78688109652ab8f68 to your computer and use it in GitHub Desktop.
Save JobLeonard/eb482c82dfa0a5b78688109652ab8f68 to your computer and use it in GitHub Desktop.
Radix sorts for TypedArrays, or arrays with values that would fit in 32-bit typed arrays. Outperforms built-in sort!
/*
By Job van der Zwan, CC0 2017
Inspired by Malte Skarupke's ska_sort[0], I wanted to see if radixSort sort is
also faster in JavaScript.
(Also, turns out I'm too dumb to implement ska_sort, so I used the more
simple LSB radixSort sort instead. It is based on an implementation by
Victor J. Duvanenko, but cleaned up to remove a lot of redundancies,
updated for ES6, and making use of TypedArrays for better performance.
If someone else could give a ska sort implemention a try, to see if
it might do even better, that would be awesome!)
For large uniform distributions, radixSort sort beats the built-in sort.
I haven't tested other types of inputs. See benchmarks here:
https://run.perf.zone/view/radixSort-sorts-for-typed-arrays-v2-1507847259416
https://run.perf.zone/view/radixSort-sort-32-bit-int-with-plain-array-test-1507850042653
I hope that for the typed arrays at least, this speed boost is temporary:
with the right algorithm, the built-in sort should be able to be faster
than these, due to better memory access and other reductions in overhead.
TODO: implement and benchmark a Float64 version (and faux-Uint64 version)
This will just be like Uint32, but reading out two 32-bit values at a
time and making 8 passes to cover all bytes.
PS: Skarupke mentions other radixSort sort implementations implementing a
Uint32 radixSort sort that makes three passes in 10-11-11 bit chunks. I have
implemented and tested this[1], but it turns out to be even slower than
the built-in version. Probably due to the large count buffer needed.
[0] https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/
[1] https://run.perf.zone/view/radixSort-sort-variants-for-Uint32Array-1507830151297
*/
function radixSortUint32(input) {
const arrayConstr = input.length < (1 << 16) ?
Uint16Array :
Uint32Array;
const numberOfBins = 256 * 4;
let count = new arrayConstr(numberOfBins);
let output = new Uint32Array(input.length);
// count all bytes in one pass
for (let i = 0; i < input.length; i++) {
let val = input[i];
count[val & 0xFF]++;
count[((val >> 8) & 0xFF) + 256]++;
count[((val >> 16) & 0xFF) + 512]++;
count[((val >> 24) & 0xFF) + 768]++;
}
// create summed array
for (let j = 0; j < 4; j++) {
let t = 0,
sum = 0,
offset = j * 256;
for (let i = 0; i < 256; i++) {
t = count[i + offset];
count[i + offset] = sum;
sum += t;
}
}
for (let i = 0; i < input.length; i++) {
let val = input[i];
output[count[val & 0xFF]++] = val;
}
for (let i = 0; i < input.length; i++) {
let val = output[i];
input[count[((val >> 8) & 0xFF) + 256]++] = val;
}
for (let i = 0; i < input.length; i++) {
let val = input[i];
output[count[((val >> 16) & 0xFF) + 512]++] = val;
}
for (let i = 0; i < input.length; i++) {
let val = output[i];
input[count[((val >> 24) & 0xFF) + 768]++] = val;
}
return input;
}
function radixSortInt32(input) {
// make use of ArrayBuffer to "reinterpret cast"
// the Int32Array as a Uint32Array
let uinput = input.buffer ?
new Uint32Array(input.buffer):
Uint32Array.from(input);
// adjust to positive nrs
for (let i = 0; i < uinput.length; i++) {
uinput[i] += 0x80000000;
}
// re-use radixSortUint32
radixSortUint32(uinput);
// adjust back to signed nrs
for (let i = 0; i < uinput.length; i++) {
uinput[i] -= 0x80000000;
}
// for plain arrays, fake in-place behaviour
if (input.buffer === undefined){
for (let i = 0; i < input.length; i++){
input[i] = uinput[i];
}
}
return input;
}
function radixSortFloat32(input) {
// make use of ArrayBuffer to "reinterpret cast"
// the Float32Array as a Uint32Array
let uinput = input.buffer ?
new Uint32Array(input.buffer) :
new Uint32Array(Float32Array.from(input).buffer);
// Similar to radixSortInt32, but uses a more complicated trick
// See: http://stereopsis.com/radixSort.html
for (let i = 0; i < uinput.length; i++) {
if (uinput[i] & 0x80000000) {
uinput[i] ^= 0xFFFFFFFF;
} else {
uinput[i] ^= 0x80000000;
}
}
// re-use radixSortUint32
radixSortUint32(uinput);
// adjust back to original floating point nrs
for (let i = 0; i < uinput.length; i++) {
if (uinput[i] & 0x80000000) {
uinput[i] ^= 0x80000000;
} else {
uinput[i] ^= 0xFFFFFFFF;
}
}
if (input.buffer === undefined){
let floatTemp = new Float32Array(uinput.buffer);
for(let i = 0; i < input.length; i++){
input[i] = floatTemp[i];
}
}
return input;
}
function radixSortUint16(input) {
const arrayConstr = input.length < (1 << 16) ?
Uint16Array :
Uint32Array;
const numberOfBins = 256 * 2;
let count = new arrayConstr(numberOfBins);
let output = new Uint16Array(input.length);
// count all bytes in one pass
for (let i = 0; i < input.length; i++) {
let val = input[i];
count[val & 0xFF]++;
count[((val >> 8) & 0xFF) + 256]++;
}
// create summed array
let t = 0,
sum = 0;
for (let i = 0; i < 256; i++) {
t = count[i];
count[i] = sum;
sum += t;
}
sum = 0;
for (let i = 256; i < 512; i++) {
t = count[i];
count[i] = sum;
sum += t;
}
// first pass
for (let i = 0; i < input.length; i++) {
let val = input[i];
output[count[val & 0xFF]++] = val;
}
// second pass
for (let i = 0; i < input.length; i++) {
let val = output[i];
input[count[((val >> 8) & 0xFF) + 256]++] = val;
}
return input;
}
function radixSortInt16(input) {
let uinput = input.buffer ?
new Uint16Array(input.buffer):
Uint16Array.from(input);
for (let i = 0; i < uinput.length; i++) {
uinput[i] += 0x8000;
}
radixSortUint16(uinput);
for (let i = 0; i < uinput.length; i++) {
uinput[i] += 0x8000;
}
if (input.buffer === undefined){
for(let i = 0; i < input.length; i++){
input[i] = uinput[i];
}
}
return input;
}
function radixSortUint8(input) {
const arrayConstr = input.length < (1 << 16) ?
Uint16Array :
Uint32Array;
const numberOfBins = 256;
let count = new arrayConstr(numberOfBins);
let output = new Uint8Array(input.length);
for (let i = 0; i < input.length; i++) {
count[input[i]]++;
}
// create summed array
let t = 0,
sum = 0;
for (let i = 0; i < 256; i++) {
t = count[i];
count[i] = sum;
sum += t;
}
for (let i = 0; i < input.length; i++) {
let val = input[i];
output[count[val]++] = val;
}
input.set(output);
return input;
}
function radixSortInt8(input) {
let uinput = input.buffer ?
new Uint8Array(input.buffer) :
Uint8Array.from(input);
for (let i = 0; i < uinput.length; i++) {
uinput[i] += 0x80;
}
radixSortUint8(uinput);
for (let i = 0; i < uinput.length; i++) {
uinput[i] += 0x80;
}
if (input.buffer === undefined){
for(let i = 0; i < input.length; i++){
input[i] = uinput[i];
}
}
return input;
}
function shuffle(array) {
let i = array.length;
while (i--) {
// random swap
let t = array[i];
let j = Math.random() * i | 0;
array[i] = array[j];
array[j] = t;
}
}
function testSort(arrayConstr, sortFunc) {
let testArray = new arrayConstr(1000000);
for (let i = 0; i < testArray.length; i++) {
// TypedArrays truncate, so this works out fine
testArray[i] = (0x100000000 * Math.random());
}
testArray = testArray.slice(0);
let verifyArray = testArray.slice(0).sort();
shuffle(testArray);
sortFunc(testArray);
for (let i = 0; i < testArray.length; i++) {
if (testArray[i] !== verifyArray[i]) {
console.log(`Test[${i}]: ${testArray[i]}, verify[${i}]: ${verifyArray[i]}`);
return;
}
}
console.log('All elements in correct position');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment