Skip to content

Instantly share code, notes, and snippets.

@sepans
Last active August 3, 2023 20:18
Show Gist options
  • Save sepans/419d413f786b27872b34 to your computer and use it in GitHub Desktop.
Save sepans/419d413f786b27872b34 to your computer and use it in GitHub Desktop.
Implementation of Random Projection Locality Sensitive Hashing

Port of this python code in Javascript.

LSH from Wikipedia:

Locality-sensitive hashing (LSH) reduces the dimensionality of high-dimensional data. LSH hashes input items so that similar items map to the same “buckets” with high probability (the number of buckets being much smaller than the universe of possible input items). LSH differs from conventional and cryptographic hash functions because it aims to maximize the probability of a “collision” for similar items. Locality-sensitive hashing has much in common with data clustering and nearest neighbor search.

More explanation:

'user-strict'
var math = require('mathjs');
var bigInt = require('big-integer');
function getSignature(data, randomProj) {
var res = bigInt();
for(var i = 0; i< randomProj.length; i++) {
res = res.shiftLeft(1);
var p = randomProj.slice(i, i+1)[0];
var dotProduct = math.dot(p, data);
if(dotProduct >= 0) {
res = res.add(1);
}
}
return res;
}
/**
* Count number of 1's in binary number. very slow. apparently
* num.and is very slow in big-integer
*/
function nnz(num) {
if(num.equals(0)) {
return 0;
}
var res = 1;
//num = num & (num - 1)
num = num.and(num.subtract(1));
while(!num.equals(0)) {
res +=1;
num = num.and(num.subtract(1));
}
return res
}
/*
* Same as above but counting 1's in string format. Much faster
* than above.
*/
function nnzString(num) {
var numString = num.toString(2); //get bit string
var nnzCount = (numString.match(/1/g) || []).length; // count 1's
return nnzCount;
}
function angularSimilarity(a, b) {
var dotProduct = math.dot(a, b);
var sumA = math.pow(squaredSum(a), 0.5);
var sumB = math.pow(squaredSum(b), 0.5);
var cosine = dotProduct/(sumA * sumB);
var theta = math.acos(cosine);
return 1.0 - (theta/Math.PI);
}
function squaredSum(vector) {
return vector.reduce(function(sum, item) {
return sum + (item * item);
}, 0);
}
function main() {
var dim = 200; // number of dimensions per data;
var d = math.pow(2, 10); // number of bits per signature
var runs = 24; // repeat times
var avg = 0;
for(var i = 0; i < runs; i++) {
var user1 = math.random([1, dim], -1, 1)[0]; // a 1xdim vector
var user2 = math.random([1, dim], -1, 1)[0];
var randv = math.random([d, dim], -1, 1);
var r1 = getSignature(user1, randv);
var r2 = getSignature(user2, randv);
var xor = r1.xor(r2);
var nnzVal = nnzString(xor);
var hashSim = (d-nnzVal)/d;
var angularSim = angularSimilarity(user1, user2);
console.log('angular', angularSim,'hash', hashSim, Math.abs(angularSim - hashSim));
}
}
main();
{
"name": "lsh",
"version": "0.0.1",
"description": "Very basic Local Sensitive Hashing in JS",
"main": "lsh.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "sepans",
"license": "MIT",
"dependencies": {
"big-integer": "^1.6.10",
"mathjs": "^2.5.0"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment