Created
December 7, 2021 12:45
-
-
Save Hashbrown777/78aaa2fecee29e3a7e142a642fd19b48 to your computer and use it in GitHub Desktop.
Non-comparative sorting with flexible chunking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function sort(array, radix, getDigit) { | |
const counts = new Uint32Array(radix); | |
const jobs = [{start:0, end:array.length, offset:0}]; | |
while (jobs.length) { | |
const {start, end, offset} = jobs.pop(); | |
counts.fill(0); | |
counts[null] = start; | |
const sorting = array.slice(start, end); | |
for (let index = 0; index < sorting.length; ++index) { | |
const value = getDigit(sorting[index], offset); | |
++counts[(value || value == 0) ? value : null]; | |
} | |
let total = counts[null]; | |
counts[null] = start; | |
for (let value = 0; value < counts.length; ++value) { | |
const count = counts[value]; | |
if (!count) | |
continue; | |
counts[value] = total; | |
total += count; | |
if (count > 1) | |
jobs.push({start:counts[value], end:total, offset:offset + 1}); | |
} | |
for (let index = 0; index < sorting.length; ++index) { | |
const value = getDigit(sorting[index], offset); | |
array[ | |
counts[(value || value == 0) ? value : null]++ | |
] = sorting[index]; | |
} | |
} | |
} | |
//const array = ['why there', 'why', 'why hello', 'you', 'smelly']; | |
//['smelly', 'why', 'why hello', 'why there', 'you'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//these method calls behave identically to Array.prototype.sort() | |
//per two-byte, same as, but much slower than, Array.prototype.sort() | |
sort( | |
array, | |
1 << 16, | |
(element, offset) => (element.charCodeAt(offset)) | |
); | |
//per byte, a little slower than Array.prototype.sort() | |
sort( | |
array, | |
1 << 8, | |
(element, offset) => { | |
if (offset / 2 >= element.length) | |
return null; | |
const code = element.charCodeAt(offset / 2); | |
return (offset % 2) ? code & 0xFF : code >> 8; | |
} | |
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//these method calls sort the strings, | |
//but do not sort surrogate pairs as two different characters, | |
//instead respecting the intended single character value | |
//requires some preprocessing | |
const utf8 = new TextEncoder(); | |
for (let index = 0; index < array.length; ++index) { | |
array[index] = { | |
string : array[index], | |
chars : utf8.encode(array[index]) | |
} | |
} | |
//utf8 byte-by-byte sorting is identical to full-width 21bit Unicode sorting | |
//this is much faster than sorting with regular strings, | |
//even with the built-in sort, but the preprocess step above is slow, | |
//this more shows off the algorithm's support for other data types, | |
//or if you happen to be sorting utf8 buffers already | |
sort( | |
array, | |
1 << 8, | |
(element, offset) => (element.chars[offset]) | |
); | |
//you can even sort per half-byte if you wish | |
sort( | |
array, | |
1 << 4, | |
(element, offset) => { | |
if (offset / 2 >= element.length) | |
return null; | |
const code = element.chars[offset / 2 | 0]; | |
return (offset % 2) ? code & 0xF : code >> 4; | |
} | |
); | |
//and post-processing | |
for (let index = 0; index < array.length; ++index) | |
array[index] = array[index].string; |
Author
Hashbrown777
commented
Dec 7, 2021
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment