Skip to content

Instantly share code, notes, and snippets.

@lovasoa
Last active January 21, 2021 19:56
Show Gist options
  • Save lovasoa/3361645 to your computer and use it in GitHub Desktop.
Save lovasoa/3361645 to your computer and use it in GitHub Desktop.
Compute the intersection of large arrays in javascript

WARNING : This code now lives in its own github repository: lovasoa/fast_array_intersect. It is also published on npm: fast_array_intersect

Array intersect

Fastest function to intersect a large number of big arrays in javascript, without dependencies

  • The compressed version is only 345 caracters long.
  • Faster than common libraries, even a large number of arrays, or on very big arrays. (See benchmarks)

Usage

array_intersect(array1, array2, ..., arrayN)

How it works

The idea is simple and comes from here: http://pioupioum.fr/developpement/javascript-array-intersection.html.

To make short story even shorter: It creates a javascript object, the keys of which are the elements of the smallest of the arrays we want to intersect.

function array_intersect() {
var i, all, shortest, nShortest, n, len, ret = [], obj={}, nOthers;
nOthers = arguments.length-1;
nShortest = arguments[0].length;
shortest = 0;
for (i=0; i<=nOthers; i++){
n = arguments[i].length;
if (n<nShortest) {
shortest = i;
nShortest = n;
}
}
for (i=0; i<=nOthers; i++) {
n = (i===shortest)?0:(i||shortest); //Read the shortest array first. Read the first array instead of the shortest
len = arguments[n].length;
for (var j=0; j<len; j++) {
var elem = arguments[n][j];
if(obj[elem] === i-1) {
if(i === nOthers) {
ret.push(elem);
obj[elem]=0;
} else {
obj[elem]=i;
}
}else if (i===0) {
obj[elem]=0;
}
}
}
return ret;
}
function array_intersect(){var a,b,c,d,e,f,g=[],h={},i;i=arguments.length-1;d=arguments[0].length;c=0;for(a=0;a<=i;a++){e=arguments[a].length;if(e<d){c=a;d=e}}for(a=0;a<=i;a++){e=a===c?0:a||c;f=arguments[e].length;for(var j=0;j<f;j++){var k=arguments[e][j];if(h[k]===a-1){if(a===i){g.push(k);h[k]=0}else{h[k]=a}}else if(a===0){h[k]=0}}}return g}
<html>
<head>
<title>Array intersection</title>
<script src="http://www.broofa.com/Tools/JSLitmus/JSLitmus.js"></script>
<script src="https://ajax.googleapis.com/ajax/libs/prototype/1.7.1.0/prototype.js"></script>
<script>
function array_intersect() {
var i, all, shortest, nShortest, n, len, ret = [], obj={}, nOthers;
nOthers = arguments.length-1;
nShortest = arguments[0].length;
shortest = 0;
for (i=0; i<=nOthers; i++){
n = arguments[i].length;
if (n<nShortest) {
shortest = i;
nShortest = n;
}
}
for (i=0; i<=nOthers; i++) {
n = (i===shortest)?0:(i||shortest); //Read the shortest array first. Read the first array instead of the shortest
len = arguments[n].length;
for (var j=0; j<len; j++) {
var elem = arguments[n][j];
if(obj[elem] === i-1) {
if(i === nOthers) {
ret.push(elem);
obj[elem]=0;
} else {
obj[elem]=i;
}
}else if (i===0) {
obj[elem]=0;
}
}
}
return ret;
}
function array_big_intersect () {
var args = Array.prototype.slice.call(arguments),
aLower = [],
aStack = [],
count,
i,
nArgs,
nLower,
oRest = {},
oTmp = {},
value,
compareArrayLength = function (a, b) {
return (a.length - b.length);
},
indexes = function (array, oStack) {
var i = 0,
value,
nArr = array.length,
oTmp = {};
for (; nArr > i; ++i) {
value = array[i];
if (!oTmp[value]) {
oStack[value] = 1 + (oStack[value] || 0); // counter
oTmp[value] = true;
}
}
return oStack;
};
args.sort(compareArrayLength);
aLower = args.shift();
nLower = aLower.length;
if (0 === nLower) {
return aStack;
}
nArgs = args.length;
i = nArgs;
while (i--) {
oRest = indexes(args.shift(), oRest);
}
for (i = 0; nLower > i; ++i) {
value = aLower[i];
count = oRest[value];
if (!oTmp[value]) {
if (nArgs === count) {
aStack.push(value);
oTmp[value] = true;
}
}
}
return aStack;
}
setTimeout(function(){
arrays = new Array();
strings = new Array();
var alph = "azertyuiopsdfghjklmwxcvbn,;:1234567890°³µ1";
arrays[0] = strings [0] = [1];
for (var i=1; i<=1000; i++) {
arrays[i] = new Array();
strings[i] = new Array();
for (var j=0; j<i/2; j++) {
var rnd = parseInt(42*Math.random());
arrays[i][j]=rnd;
var str="";
for (var k=0;k<Math.random();k+=0.05) {
str += alph[rnd%42]+"zizi";
}
strings[i].push(str);
}
}
smallArrays = [];
for (var i=0; i<8000; i++){
smallArrays[i] = [0,0,0,0,0,0,0,0,0,0].map(function(){
return parseInt(2*Math.random());
})
}
bigArrays = arrays.slice(900);
}, 10);
JSLitmus.test('2 arrays of 10 numbers : array_intersect', function(count) {
while (count--) {
array_intersect(smallArrays[0],smallArrays[1]);
}
});
JSLitmus.test('2 arrays of 10 numbers : array_big_intersect', function(count) {
while (count--) {
array_big_intersect(smallArrays[0],smallArrays[1]);
}
});
JSLitmus.test('2 arrays of 10 numbers : prototype.js', function(count) {
while (count--) {
smallArrays[0].intersect(smallArrays[1]);
}
});
JSLitmus.test('8000 arrays of 10 numbers : array_intersect', function(count) {
while (count--) {
array_intersect.apply(this,smallArrays);
}
});
JSLitmus.test('8000 arrays of 10 numbers : array_big_intersect', function(count) {
while (count--) {
array_big_intersect.apply(this,smallArrays);
}
});
JSLitmus.test('2 arrays of 500 numbers : array_intersect', function(count) {
while (count--) {
array_intersect(arrays[999], arrays[1000]);
}
});
JSLitmus.test('2 arrays of 500 numbers : array_big_intersect', function(count) {
while (count--) {
array_big_intersect(arrays[999], arrays[1000]);
}
});
JSLitmus.test('2 arrays of 500 numbers : PROTOTYPE.JS intersect', function(count) {
while (count--) {
arrays[999].intersect(arrays[1000]);
}
});
JSLitmus.test('2 arrays of 500 strings : array_intersect', function(count) {
while (count--) {
array_intersect(strings[999], strings[1000]);
}
});
JSLitmus.test('2 arrays of 500 strings : array_big_intersect', function(count) {
while (count--) {
array_big_intersect(strings[999], strings[1000]);
}
});
JSLitmus.test('2 arrays of 500 strings : PROTOTYPE.JS intersect', function(count) {
while (count--) {
strings[999].intersect(strings[1000]);
}
});
JSLitmus.test('100 arrays of ~500 numbers : array_intersect', function(count) {
while (count--) {
array_intersect.apply(this, bigArrays);
}
});
JSLitmus.test('100 arrays of ~500 numbers : array_big_intersect', function(count) {
while (count--) {
array_big_intersect.apply(this, bigArrays);
}
});
JSLitmus.test('300 arrays of numbers : array_intersect', function() {
array_intersect.apply(this, arrays.slice(200,300));
});
JSLitmus.test('300 arrays of numbers : array_big_intersect', function() {
array_big_intersect.apply(this, arrays.slice(200,300));
});
setTimeout(function() {
veryBigArrays = [[],[]];
for (var i=0;i<100000;i++){
veryBigArrays[0].push(parseInt(1000*Math.random()));
veryBigArrays[1].push(parseInt(1000*Math.random()))
}
}, 10);
JSLitmus.test('2 arrays of 100000 numbers : array_intersect', function(count) {
while (count--) {
array_intersect(veryBigArrays[0], veryBigArrays[1]);
}
});
JSLitmus.test('2 arrays of 100000 numbers : array_big_intersect', function(count) {
while (count--) {
array_big_intersect(veryBigArrays[0], veryBigArrays[1]);
}
});
</script>
</head>
<body>
</body>
</html>
@aclement30
Copy link

When comparing array of objects, it would be preferable to replace obj[elem] with some kind of stringified representation of the object (eg. obj[JSON.stringify(elem)]. Otherwise, the key will always be [object Object] for all objects and only the first intersecting object will be returned.

@lovasoa
Copy link
Author

lovasoa commented Sep 2, 2019

@aclement30 I just created a real github repository for this small piece of code, and published it on npm.

The updated version uses a Map instead of javascript object, and the user can provide a hash function such as JSON.stringify if they want to use objects.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment