zbentley/anagram.js

## anagram.js
/*
Optimized potential-anagram identifier function.

Runs in O(N) time on the supplied string, with several o(N) optimizations as well.

Consumes at worst half of the supplied string's space in memory (additionally), plus overhead.

This algorithm uses two paths: a reusable array of integer values (it could just as well be a bitfield,
but that probably won't save any performance since memory has to be written in whole words anyway)
with a known, fixed size: it will store the first, most common, 512 characters of the alphabet and
their counts. Since unicode is a thing, if we come across a character with a code greater than 512,
we fall back to using an old-school dynamically-sized array. The dynamically-sized fallback can't be
stored globally and re-used like the fast-path array can be, since that would risk a memory leak: after
repeated invocations, its worst-case size would be <array element pointer overhead + boolean size> *
<the number of unicode code points>, which is rather a lot.
*/
const _COUNTS = new Uint8Array(512);
function is_anagram(string) {
	_COUNTS.fill(0);
	let odds = 0, i = string.length;
	const localCounts = [];
	while (i--) {

		const char = string.codePointAt(i);
		if ( char > _COUNTS.length ) { // Slow path using a dictionary of booleans for unicodeish things:
			if ( localCounts[char] ) {
				odds--;
				localCounts[char] = false;
			} else {
				localCounts[char] = true;
				odds++;
			}
		} else { // Fast path, using a bitfield-ish array:
			if ( _COUNTS[char] ) {
				odds--;
				_COUNTS[char] = 0;
			} else {
				_COUNTS[char] = 1;
				odds++;
			}
		}
		// If we have more odd-numbered letters than there are left in the string, we can bail now.
		// On face, this seems like a silly o(N) shortcut, and adds a bit of loop overhead. However,
		// this check reduces the memory needed to identify non-anagram strings containing exclusively
		// >512-code-point characters by half.
		if ( odds > i + 1 ) {
			return false;
		}
	}
	return odds < 2;
}
	/*
	Optimized potential-anagram identifier function.

	Runs in O(N) time on the supplied string, with several o(N) optimizations as well.

	Consumes at worst half of the supplied string's space in memory (additionally), plus overhead.

	This algorithm uses two paths: a reusable array of integer values (it could just as well be a bitfield,
	but that probably won't save any performance since memory has to be written in whole words anyway)
	with a known, fixed size: it will store the first, most common, 512 characters of the alphabet and
	their counts. Since unicode is a thing, if we come across a character with a code greater than 512,
	we fall back to using an old-school dynamically-sized array. The dynamically-sized fallback can't be
	stored globally and re-used like the fast-path array can be, since that would risk a memory leak: after
	repeated invocations, its worst-case size would be <array element pointer overhead + boolean size> *
	<the number of unicode code points>, which is rather a lot.
	*/
	const _COUNTS = new Uint8Array(512);
	function is_anagram(string) {
	_COUNTS.fill(0);
	let odds = 0, i = string.length;
	const localCounts = [];
	while (i--) {

	const char = string.codePointAt(i);
	if ( char > _COUNTS.length ) { // Slow path using a dictionary of booleans for unicodeish things:
	if ( localCounts[char] ) {
	odds--;
	localCounts[char] = false;
	} else {
	localCounts[char] = true;
	odds++;
	}
	} else { // Fast path, using a bitfield-ish array:
	if ( _COUNTS[char] ) {
	odds--;
	_COUNTS[char] = 0;
	} else {
	_COUNTS[char] = 1;
	odds++;
	}
	}
	// If we have more odd-numbered letters than there are left in the string, we can bail now.
	// On face, this seems like a silly o(N) shortcut, and adds a bit of loop overhead. However,
	// this check reduces the memory needed to identify non-anagram strings containing exclusively
	// >512-code-point characters by half.
	if ( odds > i + 1 ) {
	return false;
	}
	}
	return odds < 2;
	}