Skip to content

Instantly share code, notes, and snippets.

@lovasoa
Last active January 21, 2021 19:56
Show Gist options
  • Save lovasoa/3361645 to your computer and use it in GitHub Desktop.
Save lovasoa/3361645 to your computer and use it in GitHub Desktop.
Compute the intersection of large arrays in javascript

WARNING : This code now lives in its own github repository: lovasoa/fast_array_intersect. It is also published on npm: fast_array_intersect

Array intersect

Fastest function to intersect a large number of big arrays in javascript, without dependencies

  • The compressed version is only 345 caracters long.
  • Faster than common libraries, even a large number of arrays, or on very big arrays. (See benchmarks)

Usage

array_intersect(array1, array2, ..., arrayN)

How it works

The idea is simple and comes from here: http://pioupioum.fr/developpement/javascript-array-intersection.html.

To make short story even shorter: It creates a javascript object, the keys of which are the elements of the smallest of the arrays we want to intersect.

function array_intersect() {
var i, all, shortest, nShortest, n, len, ret = [], obj={}, nOthers;
nOthers = arguments.length-1;
nShortest = arguments[0].length;
shortest = 0;
for (i=0; i<=nOthers; i++){
n = arguments[i].length;
if (n<nShortest) {
shortest = i;
nShortest = n;
}
}
for (i=0; i<=nOthers; i++) {
n = (i===shortest)?0:(i||shortest); //Read the shortest array first. Read the first array instead of the shortest
len = arguments[n].length;
for (var j=0; j<len; j++) {
var elem = arguments[n][j];
if(obj[elem] === i-1) {
if(i === nOthers) {
ret.push(elem);
obj[elem]=0;
} else {
obj[elem]=i;
}
}else if (i===0) {
obj[elem]=0;
}
}
}
return ret;
}
function array_intersect(){var a,b,c,d,e,f,g=[],h={},i;i=arguments.length-1;d=arguments[0].length;c=0;for(a=0;a<=i;a++){e=arguments[a].length;if(e<d){c=a;d=e}}for(a=0;a<=i;a++){e=a===c?0:a||c;f=arguments[e].length;for(var j=0;j<f;j++){var k=arguments[e][j];if(h[k]===a-1){if(a===i){g.push(k);h[k]=0}else{h[k]=a}}else if(a===0){h[k]=0}}}return g}
<html>
<head>
<title>Array intersection</title>
<script src="http://www.broofa.com/Tools/JSLitmus/JSLitmus.js"></script>
<script src="https://ajax.googleapis.com/ajax/libs/prototype/1.7.1.0/prototype.js"></script>
<script>
function array_intersect() {
var i, all, shortest, nShortest, n, len, ret = [], obj={}, nOthers;
nOthers = arguments.length-1;
nShortest = arguments[0].length;
shortest = 0;
for (i=0; i<=nOthers; i++){
n = arguments[i].length;
if (n<nShortest) {
shortest = i;
nShortest = n;
}
}
for (i=0; i<=nOthers; i++) {
n = (i===shortest)?0:(i||shortest); //Read the shortest array first. Read the first array instead of the shortest
len = arguments[n].length;
for (var j=0; j<len; j++) {
var elem = arguments[n][j];
if(obj[elem] === i-1) {
if(i === nOthers) {
ret.push(elem);
obj[elem]=0;
} else {
obj[elem]=i;
}
}else if (i===0) {
obj[elem]=0;
}
}
}
return ret;
}
function array_big_intersect () {
var args = Array.prototype.slice.call(arguments),
aLower = [],
aStack = [],
count,
i,
nArgs,
nLower,
oRest = {},
oTmp = {},
value,
compareArrayLength = function (a, b) {
return (a.length - b.length);
},
indexes = function (array, oStack) {
var i = 0,
value,
nArr = array.length,
oTmp = {};
for (; nArr > i; ++i) {
value = array[i];
if (!oTmp[value]) {
oStack[value] = 1 + (oStack[value] || 0); // counter
oTmp[value] = true;
}
}
return oStack;
};
args.sort(compareArrayLength);
aLower = args.shift();
nLower = aLower.length;
if (0 === nLower) {
return aStack;
}
nArgs = args.length;
i = nArgs;
while (i--) {
oRest = indexes(args.shift(), oRest);
}
for (i = 0; nLower > i; ++i) {
value = aLower[i];
count = oRest[value];
if (!oTmp[value]) {
if (nArgs === count) {
aStack.push(value);
oTmp[value] = true;
}
}
}
return aStack;
}
setTimeout(function(){
arrays = new Array();
strings = new Array();
var alph = "azertyuiopsdfghjklmwxcvbn,;:1234567890°³µ1";
arrays[0] = strings [0] = [1];
for (var i=1; i<=1000; i++) {
arrays[i] = new Array();
strings[i] = new Array();
for (var j=0; j<i/2; j++) {
var rnd = parseInt(42*Math.random());
arrays[i][j]=rnd;
var str="";
for (var k=0;k<Math.random();k+=0.05) {
str += alph[rnd%42]+"zizi";
}
strings[i].push(str);
}
}
smallArrays = [];
for (var i=0; i<8000; i++){
smallArrays[i] = [0,0,0,0,0,0,0,0,0,0].map(function(){
return parseInt(2*Math.random());
})
}
bigArrays = arrays.slice(900);
}, 10);
JSLitmus.test('2 arrays of 10 numbers : array_intersect', function(count) {
while (count--) {
array_intersect(smallArrays[0],smallArrays[1]);
}
});
JSLitmus.test('2 arrays of 10 numbers : array_big_intersect', function(count) {
while (count--) {
array_big_intersect(smallArrays[0],smallArrays[1]);
}
});
JSLitmus.test('2 arrays of 10 numbers : prototype.js', function(count) {
while (count--) {
smallArrays[0].intersect(smallArrays[1]);
}
});
JSLitmus.test('8000 arrays of 10 numbers : array_intersect', function(count) {
while (count--) {
array_intersect.apply(this,smallArrays);
}
});
JSLitmus.test('8000 arrays of 10 numbers : array_big_intersect', function(count) {
while (count--) {
array_big_intersect.apply(this,smallArrays);
}
});
JSLitmus.test('2 arrays of 500 numbers : array_intersect', function(count) {
while (count--) {
array_intersect(arrays[999], arrays[1000]);
}
});
JSLitmus.test('2 arrays of 500 numbers : array_big_intersect', function(count) {
while (count--) {
array_big_intersect(arrays[999], arrays[1000]);
}
});
JSLitmus.test('2 arrays of 500 numbers : PROTOTYPE.JS intersect', function(count) {
while (count--) {
arrays[999].intersect(arrays[1000]);
}
});
JSLitmus.test('2 arrays of 500 strings : array_intersect', function(count) {
while (count--) {
array_intersect(strings[999], strings[1000]);
}
});
JSLitmus.test('2 arrays of 500 strings : array_big_intersect', function(count) {
while (count--) {
array_big_intersect(strings[999], strings[1000]);
}
});
JSLitmus.test('2 arrays of 500 strings : PROTOTYPE.JS intersect', function(count) {
while (count--) {
strings[999].intersect(strings[1000]);
}
});
JSLitmus.test('100 arrays of ~500 numbers : array_intersect', function(count) {
while (count--) {
array_intersect.apply(this, bigArrays);
}
});
JSLitmus.test('100 arrays of ~500 numbers : array_big_intersect', function(count) {
while (count--) {
array_big_intersect.apply(this, bigArrays);
}
});
JSLitmus.test('300 arrays of numbers : array_intersect', function() {
array_intersect.apply(this, arrays.slice(200,300));
});
JSLitmus.test('300 arrays of numbers : array_big_intersect', function() {
array_big_intersect.apply(this, arrays.slice(200,300));
});
setTimeout(function() {
veryBigArrays = [[],[]];
for (var i=0;i<100000;i++){
veryBigArrays[0].push(parseInt(1000*Math.random()));
veryBigArrays[1].push(parseInt(1000*Math.random()))
}
}, 10);
JSLitmus.test('2 arrays of 100000 numbers : array_intersect', function(count) {
while (count--) {
array_intersect(veryBigArrays[0], veryBigArrays[1]);
}
});
JSLitmus.test('2 arrays of 100000 numbers : array_big_intersect', function(count) {
while (count--) {
array_big_intersect(veryBigArrays[0], veryBigArrays[1]);
}
});
</script>
</head>
<body>
</body>
</html>
@mebert
Copy link

mebert commented Aug 28, 2017

Do you have a license for the array_intersect function. We'd like to use it in some of our code, but we can't unless we know what license you've posted this under. It would be awesome if it were under MIT license :D

  • Thanks

@aclement30
Copy link

When comparing array of objects, it would be preferable to replace obj[elem] with some kind of stringified representation of the object (eg. obj[JSON.stringify(elem)]. Otherwise, the key will always be [object Object] for all objects and only the first intersecting object will be returned.

@lovasoa
Copy link
Author

lovasoa commented Sep 2, 2019

@aclement30 I just created a real github repository for this small piece of code, and published it on npm.

The updated version uses a Map instead of javascript object, and the user can provide a hash function such as JSON.stringify if they want to use objects.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment