Skip to content

Instantly share code, notes, and snippets.

@aiscool
Forked from sepans/README.md
Created January 17, 2016 02:58
Show Gist options
  • Save aiscool/8f7a9b13a07847e635cf to your computer and use it in GitHub Desktop.
Save aiscool/8f7a9b13a07847e635cf to your computer and use it in GitHub Desktop.
Implementation of Random Projection Locality Sensitive Hashing

Port of this python code in Javascript.

LSH from Wikipedia:

Locality-sensitive hashing (LSH) reduces the dimensionality of high-dimensional data. LSH hashes input items so that similar items map to the same “buckets” with high probability (the number of buckets being much smaller than the universe of possible input items). LSH differs from conventional and cryptographic hash functions because it aims to maximize the probability of a “collision” for similar items. Locality-sensitive hashing has much in common with data clustering and nearest neighbor search.

More explanation:

'user-strict'
var math = require('mathjs');
var bigInt = require('big-integer');
function getSignature(data, randomProj) {
var res = bigInt();
for(var i = 0; i< randomProj.length; i++) {
res = res.shiftLeft(1);
var p = randomProj.slice(i, i+1)[0];
var dotProduct = math.dot(p, data);
if(dotProduct >= 0) {
res = res.add(1);
}
}
return res;
}
/**
* Count number of 1's in binary number. very slow. apparently
* num.and is very slow in big-integer
*/
function nnz(num) {
if(num.equals(0)) {
return 0;
}
var res = 1;
//num = num & (num - 1)
num = num.and(num.subtract(1));
while(!num.equals(0)) {
res +=1;
num = num.and(num.subtract(1));
}
return res
}
/*
* Same as above but counting 1's in string format. Much faster
* than above.
*/
function nnzString(num) {
var numString = num.toString(2); //get bit string
var nnzCount = (numString.match(/1/g) || []).length; // count 1's
return nnzCount;
}
function angularSimilarity(a, b) {
var dotProduct = math.dot(a, b);
var sumA = math.pow(squaredSum(a), 0.5);
var sumB = math.pow(squaredSum(b), 0.5);
var cosine = dotProduct/(sumA * sumB);
var theta = math.acos(cosine);
return 1.0 - (theta/Math.PI);
}
function squaredSum(vector) {
return vector.reduce(function(sum, item) {
return sum + (item * item);
}, 0);
}
function main() {
var dim = 200; // number of dimensions per data;
var d = math.pow(2, 10); // number of bits per signature
var runs = 24; // repeat times
var avg = 0;
for(var i = 0; i < runs; i++) {
var user1 = math.random([1, dim], -1, 1)[0]; // a 1xdim vector
var user2 = math.random([1, dim], -1, 1)[0];
var randv = math.random([d, dim], -1, 1);
var r1 = getSignature(user1, randv);
var r2 = getSignature(user2, randv);
var xor = r1.xor(r2);
var nnzVal = nnzString(xor);
var hashSim = (d-nnzVal)/d;
var angularSim = angularSimilarity(user1, user2);
console.log('angular', angularSim,'hash', hashSim, Math.abs(angularSim - hashSim));
}
}
main();
{
"name": "lsh",
"version": "0.0.1",
"description": "Very basic Local Sensitive Hashing in JS",
"main": "lsh.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "sepans",
"license": "MIT",
"dependencies": {
"big-integer": "^1.6.10",
"mathjs": "^2.5.0"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment