Last active
November 16, 2021 08:40
-
-
Save JobLeonard/eb482c82dfa0a5b78688109652ab8f68 to your computer and use it in GitHub Desktop.
Radix sorts for TypedArrays, or arrays with values that would fit in 32-bit typed arrays. Outperforms built-in sort!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
By Job van der Zwan, CC0 2017 | |
Inspired by Malte Skarupke's ska_sort[0], I wanted to see if radixSort sort is | |
also faster in JavaScript. | |
(Also, turns out I'm too dumb to implement ska_sort, so I used the more | |
simple LSB radixSort sort instead. It is based on an implementation by | |
Victor J. Duvanenko, but cleaned up to remove a lot of redundancies, | |
updated for ES6, and making use of TypedArrays for better performance. | |
If someone else could give a ska sort implemention a try, to see if | |
it might do even better, that would be awesome!) | |
For large uniform distributions, radixSort sort beats the built-in sort. | |
I haven't tested other types of inputs. See benchmarks here: | |
https://run.perf.zone/view/radixSort-sorts-for-typed-arrays-v2-1507847259416 | |
https://run.perf.zone/view/radixSort-sort-32-bit-int-with-plain-array-test-1507850042653 | |
I hope that for the typed arrays at least, this speed boost is temporary: | |
with the right algorithm, the built-in sort should be able to be faster | |
than these, due to better memory access and other reductions in overhead. | |
TODO: implement and benchmark a Float64 version (and faux-Uint64 version) | |
This will just be like Uint32, but reading out two 32-bit values at a | |
time and making 8 passes to cover all bytes. | |
PS: Skarupke mentions other radixSort sort implementations implementing a | |
Uint32 radixSort sort that makes three passes in 10-11-11 bit chunks. I have | |
implemented and tested this[1], but it turns out to be even slower than | |
the built-in version. Probably due to the large count buffer needed. | |
[0] https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/ | |
[1] https://run.perf.zone/view/radixSort-sort-variants-for-Uint32Array-1507830151297 | |
*/ | |
function radixSortUint32(input) { | |
const arrayConstr = input.length < (1 << 16) ? | |
Uint16Array : | |
Uint32Array; | |
const numberOfBins = 256 * 4; | |
let count = new arrayConstr(numberOfBins); | |
let output = new Uint32Array(input.length); | |
// count all bytes in one pass | |
for (let i = 0; i < input.length; i++) { | |
let val = input[i]; | |
count[val & 0xFF]++; | |
count[((val >> 8) & 0xFF) + 256]++; | |
count[((val >> 16) & 0xFF) + 512]++; | |
count[((val >> 24) & 0xFF) + 768]++; | |
} | |
// create summed array | |
for (let j = 0; j < 4; j++) { | |
let t = 0, | |
sum = 0, | |
offset = j * 256; | |
for (let i = 0; i < 256; i++) { | |
t = count[i + offset]; | |
count[i + offset] = sum; | |
sum += t; | |
} | |
} | |
for (let i = 0; i < input.length; i++) { | |
let val = input[i]; | |
output[count[val & 0xFF]++] = val; | |
} | |
for (let i = 0; i < input.length; i++) { | |
let val = output[i]; | |
input[count[((val >> 8) & 0xFF) + 256]++] = val; | |
} | |
for (let i = 0; i < input.length; i++) { | |
let val = input[i]; | |
output[count[((val >> 16) & 0xFF) + 512]++] = val; | |
} | |
for (let i = 0; i < input.length; i++) { | |
let val = output[i]; | |
input[count[((val >> 24) & 0xFF) + 768]++] = val; | |
} | |
return input; | |
} | |
function radixSortInt32(input) { | |
// make use of ArrayBuffer to "reinterpret cast" | |
// the Int32Array as a Uint32Array | |
let uinput = input.buffer ? | |
new Uint32Array(input.buffer): | |
Uint32Array.from(input); | |
// adjust to positive nrs | |
for (let i = 0; i < uinput.length; i++) { | |
uinput[i] += 0x80000000; | |
} | |
// re-use radixSortUint32 | |
radixSortUint32(uinput); | |
// adjust back to signed nrs | |
for (let i = 0; i < uinput.length; i++) { | |
uinput[i] -= 0x80000000; | |
} | |
// for plain arrays, fake in-place behaviour | |
if (input.buffer === undefined){ | |
for (let i = 0; i < input.length; i++){ | |
input[i] = uinput[i]; | |
} | |
} | |
return input; | |
} | |
function radixSortFloat32(input) { | |
// make use of ArrayBuffer to "reinterpret cast" | |
// the Float32Array as a Uint32Array | |
let uinput = input.buffer ? | |
new Uint32Array(input.buffer) : | |
new Uint32Array(Float32Array.from(input).buffer); | |
// Similar to radixSortInt32, but uses a more complicated trick | |
// See: http://stereopsis.com/radixSort.html | |
for (let i = 0; i < uinput.length; i++) { | |
if (uinput[i] & 0x80000000) { | |
uinput[i] ^= 0xFFFFFFFF; | |
} else { | |
uinput[i] ^= 0x80000000; | |
} | |
} | |
// re-use radixSortUint32 | |
radixSortUint32(uinput); | |
// adjust back to original floating point nrs | |
for (let i = 0; i < uinput.length; i++) { | |
if (uinput[i] & 0x80000000) { | |
uinput[i] ^= 0x80000000; | |
} else { | |
uinput[i] ^= 0xFFFFFFFF; | |
} | |
} | |
if (input.buffer === undefined){ | |
let floatTemp = new Float32Array(uinput.buffer); | |
for(let i = 0; i < input.length; i++){ | |
input[i] = floatTemp[i]; | |
} | |
} | |
return input; | |
} | |
function radixSortUint16(input) { | |
const arrayConstr = input.length < (1 << 16) ? | |
Uint16Array : | |
Uint32Array; | |
const numberOfBins = 256 * 2; | |
let count = new arrayConstr(numberOfBins); | |
let output = new Uint16Array(input.length); | |
// count all bytes in one pass | |
for (let i = 0; i < input.length; i++) { | |
let val = input[i]; | |
count[val & 0xFF]++; | |
count[((val >> 8) & 0xFF) + 256]++; | |
} | |
// create summed array | |
let t = 0, | |
sum = 0; | |
for (let i = 0; i < 256; i++) { | |
t = count[i]; | |
count[i] = sum; | |
sum += t; | |
} | |
sum = 0; | |
for (let i = 256; i < 512; i++) { | |
t = count[i]; | |
count[i] = sum; | |
sum += t; | |
} | |
// first pass | |
for (let i = 0; i < input.length; i++) { | |
let val = input[i]; | |
output[count[val & 0xFF]++] = val; | |
} | |
// second pass | |
for (let i = 0; i < input.length; i++) { | |
let val = output[i]; | |
input[count[((val >> 8) & 0xFF) + 256]++] = val; | |
} | |
return input; | |
} | |
function radixSortInt16(input) { | |
let uinput = input.buffer ? | |
new Uint16Array(input.buffer): | |
Uint16Array.from(input); | |
for (let i = 0; i < uinput.length; i++) { | |
uinput[i] += 0x8000; | |
} | |
radixSortUint16(uinput); | |
for (let i = 0; i < uinput.length; i++) { | |
uinput[i] += 0x8000; | |
} | |
if (input.buffer === undefined){ | |
for(let i = 0; i < input.length; i++){ | |
input[i] = uinput[i]; | |
} | |
} | |
return input; | |
} | |
function radixSortUint8(input) { | |
const arrayConstr = input.length < (1 << 16) ? | |
Uint16Array : | |
Uint32Array; | |
const numberOfBins = 256; | |
let count = new arrayConstr(numberOfBins); | |
let output = new Uint8Array(input.length); | |
for (let i = 0; i < input.length; i++) { | |
count[input[i]]++; | |
} | |
// create summed array | |
let t = 0, | |
sum = 0; | |
for (let i = 0; i < 256; i++) { | |
t = count[i]; | |
count[i] = sum; | |
sum += t; | |
} | |
for (let i = 0; i < input.length; i++) { | |
let val = input[i]; | |
output[count[val]++] = val; | |
} | |
input.set(output); | |
return input; | |
} | |
function radixSortInt8(input) { | |
let uinput = input.buffer ? | |
new Uint8Array(input.buffer) : | |
Uint8Array.from(input); | |
for (let i = 0; i < uinput.length; i++) { | |
uinput[i] += 0x80; | |
} | |
radixSortUint8(uinput); | |
for (let i = 0; i < uinput.length; i++) { | |
uinput[i] += 0x80; | |
} | |
if (input.buffer === undefined){ | |
for(let i = 0; i < input.length; i++){ | |
input[i] = uinput[i]; | |
} | |
} | |
return input; | |
} | |
function shuffle(array) { | |
let i = array.length; | |
while (i--) { | |
// random swap | |
let t = array[i]; | |
let j = Math.random() * i | 0; | |
array[i] = array[j]; | |
array[j] = t; | |
} | |
} | |
function testSort(arrayConstr, sortFunc) { | |
let testArray = new arrayConstr(1000000); | |
for (let i = 0; i < testArray.length; i++) { | |
// TypedArrays truncate, so this works out fine | |
testArray[i] = (0x100000000 * Math.random()); | |
} | |
testArray = testArray.slice(0); | |
let verifyArray = testArray.slice(0).sort(); | |
shuffle(testArray); | |
sortFunc(testArray); | |
for (let i = 0; i < testArray.length; i++) { | |
if (testArray[i] !== verifyArray[i]) { | |
console.log(`Test[${i}]: ${testArray[i]}, verify[${i}]: ${verifyArray[i]}`); | |
return; | |
} | |
} | |
console.log('All elements in correct position'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment