Hashbrown777/1_sort.js

## 1_sort.js
function sort(array, radix, getDigit) {
	const counts = new Uint32Array(radix);
	const jobs = [{start:0, end:array.length, offset:0}];
	while (jobs.length) {
		const {start, end, offset} = jobs.pop();
		counts.fill(0);
		counts[null] = start;
		const sorting = array.slice(start, end);

		for (let index = 0; index < sorting.length; ++index) {
			const value = getDigit(sorting[index], offset);
			++counts[(value || value == 0) ? value : null];
		}

		let total = counts[null];
		counts[null] = start;
		for (let value = 0; value < counts.length; ++value) {
			const count = counts[value];
			if (!count)
				continue;

			counts[value] = total;
			total += count;

			if (count > 1)
				jobs.push({start:counts[value], end:total, offset:offset + 1});
		}

		for (let index = 0; index < sorting.length; ++index) {
			const value = getDigit(sorting[index], offset);
			array[
				counts[(value || value == 0) ? value : null]++
			] = sorting[index];
		}
	}
}

//const array = ['why there', 'why', 'why hello', 'you', 'smelly'];
//['smelly', 'why', 'why hello', 'why there', 'you']

## 2_tests.js
//these method calls behave identically to Array.prototype.sort()

//per two-byte, same as, but much slower than, Array.prototype.sort()
sort(
	array,
	1 << 16,
	(element, offset) => (element.charCodeAt(offset))
);

//per byte, a little slower than Array.prototype.sort()
sort(
	array,
	1 << 8,
	(element, offset) => {
		if (offset / 2 >= element.length)
			return null;
		const code = element.charCodeAt(offset / 2);
		return (offset % 2) ? code & 0xFF : code >> 8;
	}
);

## 3_unicodeTests.js
//these method calls sort the strings,
//but do not sort surrogate pairs as two different characters,
//instead respecting the intended single character value

//requires some preprocessing
const utf8 = new TextEncoder();
for (let index = 0; index < array.length; ++index) {
	array[index] = {
		string : array[index],
		chars  : utf8.encode(array[index])
	}
}

//utf8 byte-by-byte sorting is identical to full-width 21bit Unicode sorting
//this is much faster than sorting with regular strings,
//even with the built-in sort, but the preprocess step above is slow,
//this more shows off the algorithm's support for other data types,
//or if you happen to be sorting utf8 buffers already
sort(
	array,
	1 << 8,
	(element, offset) => (element.chars[offset])
);

//you can even sort per half-byte if you wish
sort(
	array,
	1 << 4,
	(element, offset) => {
		if (offset / 2 >= element.length)
			return null;
		const code = element.chars[offset / 2 | 0];
		return (offset % 2) ? code & 0xF : code >> 4;
	}
);

//and post-processing
for (let index = 0; index < array.length; ++index)
	array[index] = array[index].string;
	function sort(array, radix, getDigit) {
	const counts = new Uint32Array(radix);
	const jobs = [{start:0, end:array.length, offset:0}];
	while (jobs.length) {
	const {start, end, offset} = jobs.pop();
	counts.fill(0);
	counts[null] = start;
	const sorting = array.slice(start, end);

	for (let index = 0; index < sorting.length; ++index) {
	const value = getDigit(sorting[index], offset);
	++counts[(value \|\| value == 0) ? value : null];
	}

	let total = counts[null];
	counts[null] = start;
	for (let value = 0; value < counts.length; ++value) {
	const count = counts[value];
	if (!count)
	continue;

	counts[value] = total;
	total += count;

	if (count > 1)
	jobs.push({start:counts[value], end:total, offset:offset + 1});
	}

	for (let index = 0; index < sorting.length; ++index) {
	const value = getDigit(sorting[index], offset);
	array[
	counts[(value \|\| value == 0) ? value : null]++
	] = sorting[index];
	}
	}
	}

	//const array = ['why there', 'why', 'why hello', 'you', 'smelly'];
	//['smelly', 'why', 'why hello', 'why there', 'you']
	//these method calls behave identically to Array.prototype.sort()

	//per two-byte, same as, but much slower than, Array.prototype.sort()
	sort(
	array,
	1 << 16,
	(element, offset) => (element.charCodeAt(offset))
	);

	//per byte, a little slower than Array.prototype.sort()
	sort(
	array,
	1 << 8,
	(element, offset) => {
	if (offset / 2 >= element.length)
	return null;
	const code = element.charCodeAt(offset / 2);
	return (offset % 2) ? code & 0xFF : code >> 8;
	}
	);
	//these method calls sort the strings,
	//but do not sort surrogate pairs as two different characters,
	//instead respecting the intended single character value

	//requires some preprocessing
	const utf8 = new TextEncoder();
	for (let index = 0; index < array.length; ++index) {
	array[index] = {
	string : array[index],
	chars : utf8.encode(array[index])
	}
	}

	//utf8 byte-by-byte sorting is identical to full-width 21bit Unicode sorting
	//this is much faster than sorting with regular strings,
	//even with the built-in sort, but the preprocess step above is slow,
	//this more shows off the algorithm's support for other data types,
	//or if you happen to be sorting utf8 buffers already
	sort(
	array,
	1 << 8,
	(element, offset) => (element.chars[offset])
	);

	//you can even sort per half-byte if you wish
	sort(
	array,
	1 << 4,
	(element, offset) => {
	if (offset / 2 >= element.length)
	return null;
	const code = element.chars[offset / 2 \| 0];
	return (offset % 2) ? code & 0xF : code >> 4;
	}
	);

	//and post-processing
	for (let index = 0; index < array.length; ++index)
	array[index] = array[index].string;