Last active
February 18, 2022 16:02
-
-
Save dfkaye/0fbcae6cafc51f051c5ff289c0899845 to your computer and use it in GitHub Desktop.
K is a null-safe, NaN-safe, primitive-safe, object key value function for generating array diffs and intersections.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 17 feb 2022 | |
// Answer to Jamon Holmgren's challenge tweet | |
// https://twitter.com/jamonholmgren/status/1494385356925390853 | |
// K is a null-safe, NaN-safe, primitive-safe, object key value function for | |
// generating array diffs and intersections. | |
// Changelog: | |
// Started with reduce, then filter/includes, then saw Jamon's follow-up tweet regarding | |
// excessive looping and added the Set part for include, exclude methods to support big | |
// datasets. | |
// Added efficiency tests. | |
// Added non-equal sized collection comparison tests. | |
// Cleaned up the typos. | |
// Added more surprising results tests. | |
// Added big vs small arrays containing all the same values. | |
// Added empty slots (array with holes) tests. | |
/* OK, start here */ | |
// Given: | |
var ar1 = [ 'a', 'd', 'e', 'f' ] | |
var ar2 = [ 'b', 'c', 'd', 'e' ] | |
/* | |
Give me three arrays with the elements in ar1 that are missing in ar2, vice versa, & elements in both: | |
let extra1 = [ 'a', 'f' ] | |
let both = [ 'd', 'e' ] | |
let extra2 = [ 'b', c' ] | |
*/ | |
/* Answer */ | |
/** | |
* @function K (of course) takes *any* value `a`, converts it to an Object, | |
* extracts its values, returns an API with include and exclude methods | |
* that do the same to the argument `b`, feed that to a Set, and return the | |
* diff and intersection arrays respectively. | |
* | |
* Bonus: | |
* Arguments `a` and `b` may be arrays, objects, strings, numbers, booleans, | |
* etc., and even `null`, `undefined`, or `NaN`. | |
*/ | |
function K(a) { | |
var A = Object.values(Object(a)); | |
return { | |
exclude(b) { | |
var B = new Set(Object.values(Object(b))); | |
return A.filter(value => !B.has(value)); | |
}, | |
include(b) { | |
var B = new Set(Object.values(Object(b))); | |
return A.filter(value => B.has(value)); | |
} | |
} | |
} | |
/* Small arrays */ | |
// exclude | |
var e1 = K(ar1).exclude(ar2) | |
var e2 = K(ar2).exclude(ar1) | |
// include | |
var i1 = K(ar1).include(ar2) | |
console.log(e1, e2, i1) | |
// [ 'a', 'f' ], [ 'b', c' ], [ 'd', 'e' ] | |
/* Big arrays */ | |
// hundred thousand | |
var big = Array(100000).fill(0) | |
// Offset by 1 so we get 2 tiny diffs, and a huge intersection | |
var big1 = Array.from(big, function(v, i) { return i + 1 }); | |
var big2 = Array.from(big, function(v, i) { return i + 2 }); | |
// things not in big1, should be a "1" | |
var be1 = K(big1).exclude(big2) | |
// things not in big2, should be a "100001" | |
var be2 = K(big2).exclude(big1) | |
// things in both, should be all but "1" and "100001" | |
// or [2...100000] which contains 99999 items | |
var bi1 = K(big2).include(big1) | |
console.log( be1.length, be2.length, bi1.length ) | |
// 1 1 99999 | |
/* Array-like objects */ | |
console.log( K({0: "first", 1: "second" }).exclude({0: "first"}) ) | |
// Array [ "second" ] | |
console.log( K({0: "first" }).exclude({0: "first", 1: "second"}) ) | |
// Array [ ] | |
console.log( K({0: "first"}).include({0: "first"}) ) | |
// Array [ "first" ] | |
/* Big array-like objects */ | |
// Reusing the hundred thousand sized arrays, and the offset by 1 | |
// so we get 2 tiny diffs, and a huge intersection | |
var o1 = {}; | |
big1.forEach((k, i) => o1[k] = i + 1) | |
var o2 = {}; | |
big2.forEach((k, i) => o2[k] = i + 2) | |
var oe1 = K(o1).exclude(o2) | |
var oe2 = K(o2).exclude(o1) | |
var oi1 = K(o1).include(o2) | |
console.log( oe1.length, oe2.length, oi1.length ) | |
// 1 1 99999 | |
/* Strings */ | |
var s1 = K(ar1.join("")).exclude(ar2.join("")) | |
var s2 = K(ar2.join("")).exclude(ar1.join("")) | |
var s3 = K(ar2.join("")).include(ar1.join("")) | |
console.log( s1, s2, s3 ) | |
// [ 'a', 'f' ], [ 'b', c' ], [ 'd', 'e' ] | |
/* Null-safe */ | |
var u = K().include() | |
console.log(u) | |
// Array [] | |
var n = K(null).include(null) | |
console.log(n) | |
// Array [] | |
/* NaN-safe */ | |
var nan = K(NaN).include(NaN) | |
console.log(nan) | |
// Array [] | |
/* Efficiency using Sets */ | |
// Time the big object operations | |
console.time("big") | |
K(o1).exclude(o2) | |
K(o2).exclude(o1) | |
K(o1).include(o2) | |
console.timeEnd("big") | |
// Ranges between 53-120ms, mostly under 90ms, for all 3 operations on | |
// 2 collections of 100,000 entries. | |
/* Non-equal sized collections */ | |
var sm = big1.slice(0,4) | |
var lg = big1; | |
// Note: lg includes all sm items. | |
console.log( K(lg).exclude(sm) ) | |
// Array(99996) [ 5, 6, ....] | |
console.log( K(sm).exclude(lg) ) | |
// Array [] | |
console.log( K(lg).include(sm) ) | |
// Array(4) [ 1, 2, 3, 4 ] | |
/* Other things that don't include each other because they are not indexical. */ | |
console.log( K(new Set([1,2,3,4])).include(new Map([[0,1], [1,2], [2,3], [3,4]])) ) | |
// Array [] | |
console.log( K(1234).include(1234) ) | |
// Array [] | |
console.log( K(true).include(false) ) | |
// Array [] | |
/* More surprising results with array-like objects... */ | |
// No surprise here. | |
console.log( K({ 0: "hello", 1: "world", length: 2 }).include(["hello"]) ); | |
// Array [ "hello" ] | |
// This includes on the value of the *own* property, `length` (2). | |
console.log( K({ 0: "hello", 1: "world", length: 2 }).include([ 2 ]) ); | |
// Array [ 2 ] | |
// This doesn't exclude on the value of the *own* property, `length` (2). | |
console.log( K({ 0: "hello", 1: "world", length: 2 }).exclude(["world"]) ); | |
// Array [ "hello", 2 ] | |
// This doesn't exclude any values because the search value is a string, | |
// whereas the index value is a number. | |
console.log( K({ 0: "hello", 1: "world", length: 2 }).exclude(["2"]) ); | |
// Array [ "hello", "world", 2 ] | |
// This excludes the numeric value of length. | |
console.log( K({ 0: "hello", 1: "world", length: 2 }).exclude([ 2 ]) ); | |
// Array [ "hello", "world" ] | |
/* De-duplication tests */ | |
// Operations do not de-duplicate the source array. | |
console.log( K([1,1,2,2,3,3,4,4,5,5,6,6]).exclude([3,4]) ); | |
// Array(8) [ 1, 1, 2, 2, 5, 5, 6, 6 ] | |
// Let's see how well things perform when all values are the same, | |
// in one big array and one little array. | |
// hundred thousand | |
var big = Array(100000).fill(0) | |
var P = Array.from(big, function(v, i) { return v }); | |
var Q = P.slice(0,1) | |
console.time("diffsize"); | |
var p1 = K(P).exclude(Q) | |
var p2 = K(Q).exclude(P) | |
var q1 = K(Q).include(P) | |
console.timeEnd("diffsize"); | |
// I get around 20ms consistently over all 3 operations | |
// This is where the de-duplication shows up. | |
// Everything is excluded from the source. | |
// Only one 0 is included in the intersection. | |
// And diffSize timing is consistently 20ms or faster. | |
console.log( p1, p2, q1 ); | |
// Array [], Array [], Array [ 0 ] | |
/* Arrays with holes */ | |
// Holes are skipped by the iterator methods. | |
// https://gist.github.com/dfkaye/917bad71ede52f4f2b033049118a79a1 | |
var h1 = [ "A", , 2, , 3] | |
// Array(5) [ "A", <1 empty slot>, 2, <1 empty slot>, 3 ] | |
var h2 = [ , 1, , 2, , "B"] | |
// Array(6) [ <1 empty slot>, 1, <1 empty slot>, 2, <1 empty slot>, "B" ] | |
console.log( K(h1).exclude(h2), K(h2).exclude(h1), K(h2).include(h1) ); | |
// Array [ "A", 3 ], Array [ 1, "B" ], Array [ 2 ] | |
// Note: To find empty slots in an array, use a for loop and check that | |
// the index is `in` the array: | |
for (var tests = [], i = 0; i < h1.length; i++) { | |
tests.push(i in h1) | |
} | |
console.log(tests); | |
// Array(5) [ true, false, true, false, true ] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment