Skip to content

Instantly share code, notes, and snippets.

@dfkaye
Last active February 18, 2022 16:02
Show Gist options
  • Save dfkaye/0fbcae6cafc51f051c5ff289c0899845 to your computer and use it in GitHub Desktop.
Save dfkaye/0fbcae6cafc51f051c5ff289c0899845 to your computer and use it in GitHub Desktop.
K is a null-safe, NaN-safe, primitive-safe, object key value function for generating array diffs and intersections.
// 17 feb 2022
// Answer to Jamon Holmgren's challenge tweet
// https://twitter.com/jamonholmgren/status/1494385356925390853
// K is a null-safe, NaN-safe, primitive-safe, object key value function for
// generating array diffs and intersections.
// Changelog:
// Started with reduce, then filter/includes, then saw Jamon's follow-up tweet regarding
// excessive looping and added the Set part for include, exclude methods to support big
// datasets.
// Added efficiency tests.
// Added non-equal sized collection comparison tests.
// Cleaned up the typos.
// Added more surprising results tests.
// Added big vs small arrays containing all the same values.
// Added empty slots (array with holes) tests.
/* OK, start here */
// Given:
var ar1 = [ 'a', 'd', 'e', 'f' ]
var ar2 = [ 'b', 'c', 'd', 'e' ]
/*
Give me three arrays with the elements in ar1 that are missing in ar2, vice versa, & elements in both:
let extra1 = [ 'a', 'f' ]
let both = [ 'd', 'e' ]
let extra2 = [ 'b', c' ]
*/
/* Answer */
/**
* @function K (of course) takes *any* value `a`, converts it to an Object,
* extracts its values, returns an API with include and exclude methods
* that do the same to the argument `b`, feed that to a Set, and return the
* diff and intersection arrays respectively.
*
* Bonus:
* Arguments `a` and `b` may be arrays, objects, strings, numbers, booleans,
* etc., and even `null`, `undefined`, or `NaN`.
*/
function K(a) {
var A = Object.values(Object(a));
return {
exclude(b) {
var B = new Set(Object.values(Object(b)));
return A.filter(value => !B.has(value));
},
include(b) {
var B = new Set(Object.values(Object(b)));
return A.filter(value => B.has(value));
}
}
}
/* Small arrays */
// exclude
var e1 = K(ar1).exclude(ar2)
var e2 = K(ar2).exclude(ar1)
// include
var i1 = K(ar1).include(ar2)
console.log(e1, e2, i1)
// [ 'a', 'f' ], [ 'b', c' ], [ 'd', 'e' ]
/* Big arrays */
// hundred thousand
var big = Array(100000).fill(0)
// Offset by 1 so we get 2 tiny diffs, and a huge intersection
var big1 = Array.from(big, function(v, i) { return i + 1 });
var big2 = Array.from(big, function(v, i) { return i + 2 });
// things not in big1, should be a "1"
var be1 = K(big1).exclude(big2)
// things not in big2, should be a "100001"
var be2 = K(big2).exclude(big1)
// things in both, should be all but "1" and "100001"
// or [2...100000] which contains 99999 items
var bi1 = K(big2).include(big1)
console.log( be1.length, be2.length, bi1.length )
// 1 1 99999
/* Array-like objects */
console.log( K({0: "first", 1: "second" }).exclude({0: "first"}) )
// Array [ "second" ]
console.log( K({0: "first" }).exclude({0: "first", 1: "second"}) )
// Array [ ]
console.log( K({0: "first"}).include({0: "first"}) )
// Array [ "first" ]
/* Big array-like objects */
// Reusing the hundred thousand sized arrays, and the offset by 1
// so we get 2 tiny diffs, and a huge intersection
var o1 = {};
big1.forEach((k, i) => o1[k] = i + 1)
var o2 = {};
big2.forEach((k, i) => o2[k] = i + 2)
var oe1 = K(o1).exclude(o2)
var oe2 = K(o2).exclude(o1)
var oi1 = K(o1).include(o2)
console.log( oe1.length, oe2.length, oi1.length )
// 1 1 99999
/* Strings */
var s1 = K(ar1.join("")).exclude(ar2.join(""))
var s2 = K(ar2.join("")).exclude(ar1.join(""))
var s3 = K(ar2.join("")).include(ar1.join(""))
console.log( s1, s2, s3 )
// [ 'a', 'f' ], [ 'b', c' ], [ 'd', 'e' ]
/* Null-safe */
var u = K().include()
console.log(u)
// Array []
var n = K(null).include(null)
console.log(n)
// Array []
/* NaN-safe */
var nan = K(NaN).include(NaN)
console.log(nan)
// Array []
/* Efficiency using Sets */
// Time the big object operations
console.time("big")
K(o1).exclude(o2)
K(o2).exclude(o1)
K(o1).include(o2)
console.timeEnd("big")
// Ranges between 53-120ms, mostly under 90ms, for all 3 operations on
// 2 collections of 100,000 entries.
/* Non-equal sized collections */
var sm = big1.slice(0,4)
var lg = big1;
// Note: lg includes all sm items.
console.log( K(lg).exclude(sm) )
// Array(99996) [ 5, 6, ....]
console.log( K(sm).exclude(lg) )
// Array []
console.log( K(lg).include(sm) )
// Array(4) [ 1, 2, 3, 4 ]
/* Other things that don't include each other because they are not indexical. */
console.log( K(new Set([1,2,3,4])).include(new Map([[0,1], [1,2], [2,3], [3,4]])) )
// Array []
console.log( K(1234).include(1234) )
// Array []
console.log( K(true).include(false) )
// Array []
/* More surprising results with array-like objects... */
// No surprise here.
console.log( K({ 0: "hello", 1: "world", length: 2 }).include(["hello"]) );
// Array [ "hello" ]
// This includes on the value of the *own* property, `length` (2).
console.log( K({ 0: "hello", 1: "world", length: 2 }).include([ 2 ]) );
// Array [ 2 ]
// This doesn't exclude on the value of the *own* property, `length` (2).
console.log( K({ 0: "hello", 1: "world", length: 2 }).exclude(["world"]) );
// Array [ "hello", 2 ]
// This doesn't exclude any values because the search value is a string,
// whereas the index value is a number.
console.log( K({ 0: "hello", 1: "world", length: 2 }).exclude(["2"]) );
// Array [ "hello", "world", 2 ]
// This excludes the numeric value of length.
console.log( K({ 0: "hello", 1: "world", length: 2 }).exclude([ 2 ]) );
// Array [ "hello", "world" ]
/* De-duplication tests */
// Operations do not de-duplicate the source array.
console.log( K([1,1,2,2,3,3,4,4,5,5,6,6]).exclude([3,4]) );
// Array(8) [ 1, 1, 2, 2, 5, 5, 6, 6 ]
// Let's see how well things perform when all values are the same,
// in one big array and one little array.
// hundred thousand
var big = Array(100000).fill(0)
var P = Array.from(big, function(v, i) { return v });
var Q = P.slice(0,1)
console.time("diffsize");
var p1 = K(P).exclude(Q)
var p2 = K(Q).exclude(P)
var q1 = K(Q).include(P)
console.timeEnd("diffsize");
// I get around 20ms consistently over all 3 operations
// This is where the de-duplication shows up.
// Everything is excluded from the source.
// Only one 0 is included in the intersection.
// And diffSize timing is consistently 20ms or faster.
console.log( p1, p2, q1 );
// Array [], Array [], Array [ 0 ]
/* Arrays with holes */
// Holes are skipped by the iterator methods.
// https://gist.github.com/dfkaye/917bad71ede52f4f2b033049118a79a1
var h1 = [ "A", , 2, , 3]
// Array(5) [ "A", <1 empty slot>, 2, <1 empty slot>, 3 ]
var h2 = [ , 1, , 2, , "B"]
// Array(6) [ <1 empty slot>, 1, <1 empty slot>, 2, <1 empty slot>, "B" ]
console.log( K(h1).exclude(h2), K(h2).exclude(h1), K(h2).include(h1) );
// Array [ "A", 3 ], Array [ 1, "B" ], Array [ 2 ]
// Note: To find empty slots in an array, use a for loop and check that
// the index is `in` the array:
for (var tests = [], i = 0; i < h1.length; i++) {
tests.push(i in h1)
}
console.log(tests);
// Array(5) [ true, false, true, false, true ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment